{
    "schemaVersion": 1,
    "lane": "markerpdf",
    "priority": 3,
    "upstream": {
        "name": "sddai/markerPDF",
        "url": "https://github.com/sddai/markerPDF",
        "commit": "da6a2f5c9a7b1e92c82d85fbcf3680a79dd28a34",
        "license": "GPL-3.0-or-later",
        "architecture": "Python PDF-to-Markdown extraction pipeline using pdftext, pypdfium2, Surya OCR/layout models, table/equation/image extraction, and Markdown post-processing."
    },
    "benchmarkDenominator": {
        "status": "cloned-static-inventory-plus-native-runner-verifier-evidence",
        "total": 78,
        "inventorySummary": "78 tracked upstream repository paths inventoried from the cloned repository, with 78 targeted upstream lane-relevant paths inspected (workflows, benchmark/scoring scripts, committed example outputs, cleaner/postprocessor/layout/OCR/image/PDF-image-render/equation/table/output/settings/schema/debug/source/model utility modules, marker_app.py preview helpers, marker/logger.py logging setup, run_marker_app.py Streamlit runtime launcher, marker_server.py API/upload helpers, top-level batch, single-document, and chunk conversion scripts, and support scripts), plus the locked tabled-pdf 0.1.4 table-detection/recognition/assignment/heuristic-column/formatting helper source for merge and supplied table-recognition semantics, 6 README benchmark document identifiers, catalog /PageLabels number-tree label extraction, 2 actual CI benchmark PDF/reference pairs from benchmark_data_short.zip, Type0 Identity-H/V font CMap fallback, Form XObject invocation, nested Form XObject resource font scoping, page-tree inherited resource font lookup, PDF 1.5 object-stream/xref parser recovery, ToUnicode source-width fallback, malformed ToUnicode fallback, Image XObject fallback exclusion, pdftext dictionary-core metadata/options preservation, and LZWDecode stream-filter decoding, native PDF outline/Info metadata extraction, and native PDF named-destination outline resolution, and PDF link annotation URI boundaries, and DCTDecode JPEG/image stream exclusion, and CCITTFaxDecode/CCF image-filter exclusion; 0 committed Python unit test files found, and catalog XMP metadata extraction with trailer /Info fallback, CIDFont /W and /DW width metrics for text-advance grouping, inline image BI/ID/EI payload exclusion before text-token parsing, CIDSet subset default-width metrics for embedded CIDFont glyphs, cyclic Form XObject resource re-entry protection, annotation /AP /N appearance stream text extraction, indirect page-tree /Kids leaf counting, and device/Indexed/ICCBased image stream fallback exclusion, LZWDecode DecodeParms boundary handling, DCTDecode CMYK Adobe-transform preview planning, and AcroForm field flags/default appearance extraction, stream filter-chain DecodeParms arrays, PDF/A OutputIntent metadata, and remote GoTo outline actions, and JPXDecode/JBIG2Decode image-filter boundaries, hybrid xref /Prev object overrides, xref free-generation suppression, encrypted-PDF fail-closed preflight, and DocMDP signature permissions, CIDFont vertical /W2 metrics, page rotation/UserUnit/box preview geometry, and StructTreeRoot MCID reading order, marked-content /ActualText and /Alt replacement, optional-content layer visibility, and Type3 CharProc width handling, trailer /ID fingerprint metadata, catalog language and viewer-preference metadata, and catalog OpenAction safety-review metadata, length-bounded ASCIIHex/RunLength stream filters, destination Fit/XYZ page-view metadata, XFA packet review metadata, and EmbeddedFiles name-tree attachment metadata, and annotation border/popup review metadata, and native FontDescriptor /Flags styled-span extraction, and page /Dur /Trans /AA transition/action metadata, and AcroForm /CO calculation order and field/widget /AA JavaScript action review metadata, and PDF Portfolio /Collection, Filespec /CI, and PieceInfo attachment metadata, and AcroForm current/default value-state metadata from /V, /DV, /I, /Opt, and widget /AS, and ToUnicode CMap comment stripping, and indirect name-tree destination parsing, and annotation geometry review metadata, and JavaScript action-chain /Next cycle/depth safety review metadata, and latest startxref xref chain precedence before stale object-stream rebuild fallback, and TJ array PDF comment handling before WordPress text extraction, and Sound/Movie annotation review metadata without media playback or payload text leakage, and page-box inheritance/UserUnit preview rules for MarkerAppPreview, and indirect numeric DecodeParms predictor parameters for Flate/LZW stream filters, and ICCBased image color-space/soft-mask metadata before RGB preview planning, and Standard encryption permission metadata without decryption, and PDF text rendering mode visibility/clipping rules, and embedded-file Params CheckSum review metadata, and XMP/Info date timezone normalization, and signature field seed-value /SV constraints plus /Lock field-scope metadata, and linearized PDF /H hint-table byte-range exclusion, and object-stream indirect /Length /Filter /N /First recovery, and page /PieceInfo plus tagged PDF /UserProperties review metadata, and optional-content /AS usage application plus /Intent state filtering, and stream-filter error-boundary exclusion, and indirect FontDescriptor field resolution, and StructTreeRoot RoleMap tagged-content mapping, and UTF-16 XDP XFA stream packet metadata, and stale PDF stream /Length recovery with bounded endstream terminators, and latest trailer /Root catalog generation recovery, and Standard/MacRoman/Symbol simple-font encoding, and PDFDocEncoding trailer Info metadata decoding, and cyclic page-tree /Kids duplicate-leaf/resource guard handling, and indirect stream-filter name arrays with null filter entries, and undeclared XMP metadata encoding fallback with Info field completion, and rotated text-markup QuadPoints mapping through page boxes before supplied pdftext review spans, and CIDFont descriptor-only default-width text advance grouping, and xref-stream /Prev generation rows with zero-width generation entries prefer exact byte offsets before stale previous-generation rows, preserving shared resources while excluding stale page text without Python/models/external PDF tools, and Current PDF xref-stream trailer dictionaries at the latest startxref supply /Root, /Info, and /ID review metadata before stale textual trailers, so WordPress imports get current XMP title, Info author/producer/date fallback, and trailer-ID fingerprints while excluding stale trailer metadata and XMP packet text from visible Gutenberg paragraphs without Python/models/external PDF tools, and Page-box rectangle arrays with indirect numeric operands resolve before marker_app-style WordPress preview sizing, preserving inherited rotation, page-local UserUnit, and rendered image dimensions without Python/models/external PDF tools, and Invoked Form XObjects apply caller graphics-state cm plus form /Matrix to text positions and clip form-local text outside /BBox before Gutenberg paragraphs without Python/models/external PDF tools, and Supplied-document conversion merges adjacent table layout boxes before protecting Formula and Picture regions, preserving one table block while excluding duplicate equation and image placeholders without Python/models/external PDF tools, and image /Decode plus /ImageMask stencil preview mapping before RGB media review, and annotation appearance Form XObject resource/Matrix/BBox boundaries before WordPress text import, and AcroForm calculation order/signature state/lock review boundaries without action execution, and Type0 /Encoding CMap code-space/CID fallback width segmentation before WordPress paragraph extraction, and AcroForm XFA signature widget annotation-state review boundaries, and security preflight import-decision boundaries for encrypted PDFs and signature ByteRanges, and Filespec /PieceInfo /Private stream review-only fallback-text exclusion, and rich-media annotation Popup plus /A /AA /Next action review boundaries, and bounded PageLabels number-tree /Limits plus indirect ViewerPreferences operand review boundaries, and indirect page Contents array resource-stack preservation before fallback stream scanning, and catalog /MarkInfo plus page /AF Filespec review metadata, and hybrid xref table free-entry conflict precedence over companion /XRefStm rows, and AcroForm /DR default-resource font review metadata for /DA appearance strings, and catalog OpenAction chained /Next review metadata, and catalog associated-file Portfolio /Collection propagation plus per-file /CI checksum match-state metadata, and CIDFont /W decimal numeric width operands before ToUnicode text grouping, and soft-mask /Decode opacity metadata before RGB preview compositing, and page transition/action rows with bounded PageLabels plus catalog viewer-preference review metadata, and encrypted PDF metadata source priority before XMP Info and OutputIntent review fields, and RichMedia annotation action target boundaries that keep nested /A /AA /Next /AN references review-only, and stream-filter chain recovery fails closed on unresolved or malformed DecodeParms while recovering valid chained predictor streams, and forced-OCR table-detection pages route through merged table layout boxes without stale pdftext table-line predictions, and link/text-markup annotation destination plus /A /AA action review metadata without PDF action execution, and associated Filespec /PieceInfo /Private stream Params CheckSum metadata for catalog /AF review-only attachments, and page graphics-state cm text-position transforms before native text line grouping, and marker_app preview CropBox clipping to MediaBox before rotation/UserUnit sizing with zero-area review metadata, and predefined Type0 CMap -V/-H writing-mode detection with ToUnicode and CIDFont/CIDSet width boundaries, and current xref-stream trailer /Encrypt metadata-source boundaries with EncryptMetadata false XMP preservation plus encrypted Info/OutputIntent/text suppression, and composite navigation review metadata combining outline destination views, catalog OpenAction review rows, PageLabels, and target-page transitions, and StructTreeRoot RoleMap MCID replay precedence over catalog /Threads fallback when tagged content is recoverable, and object-valued Type0 /Encoding name resolution before CIDSet/default-width grouping and FontDescriptor flag review, and xref-stream type-2 object-stream member-index repair when /W omits the index field, and token-aware direct stream owner lookup that ignores fake object headers inside PDF strings before stream payloads, and page-local UserUnit scaling for rotated text-markup QuadPoints before supplied marker/pdftext span review, and AcroForm widget normal appearance streams selected by /AS as non-executing review metadata while /V remains authoritative, and supplied OCR prediction objects with text_lines/lines are unwrapped before forced table recognition so OCR table text replaces stale pdftext table lines without Python/models/external PDF tools, and Indexed ICCBased image palettes with JBIG2 preview-only filters plus soft-mask Decode opacity are represented before RGB preview planning without Python/models/external PDF tools, and fallback stream decoding uses current xref-selected direct object bodies with top-level stream dictionaries so stale/free filtered streams and nested stream-looking payload tokens are excluded, and catalog PieceInfo private Metadata/OutputIntents stay review-only under catalog metadata instead of promoting nested private XMP or PDF/A output-intent data to document metadata roots, and outline rows resolved through destination name trees inherit target page duration, transition, and review-only page action metadata without executing actions or leaking action target text and Screen/Rendition /P and /SP /MH//BE playback policy dictionaries as non-executing review metadata and Separation/DeviceN alternate image color spaces with CCITT/JPX preview-only filter metadata before RGB review and RichMedia GoToE embedded attachment actions with target dictionaries and chained JavaScript review metadata, and encrypted PDF Standard permission preflight boundaries separating permission flags from decryption availability, and ToUnicode CMap declared bfchar/bfrange row-count boundaries, and Portfolio collection schema field-value review metadata, and annotation appearance plus Sound stream review metadata, and text-showing operator source-space word-spacing boundaries for bidi ToUnicode replacement text, and indirect simple-font FirstChar width resolution before text-advance grouping, and current hybrid xref table direct-generation precedence over companion XRefStm stale compressed members, and signature Reference FieldMDP/UR3 transform review metadata, and Screen action /AN detached target review boundaries, and AcroForm non-JavaScript field/widget action review boundaries, and associated Filespec indirect PieceInfo private-stream exclusion, and pdftext table-line structure routing into native table cells The isolated page-tree /Contents boundary maps upstream page-local text extraction: page leaves are authoritative, /Contents does not inherit from /Pages ancestors, fallback stream scanning is blocked when a page tree exists without leaf contents, and inherited /Resources still decode child page fonts before WordPress paragraph rendering. The isolated stream-filter DecodeParms fail-closed slice maps present-but-malformed or unresolved known integer DecodeParms values so unsafe Flate/LZW streams are rejected before WordPress text-token parsing while valid predictor rows still decode natively. The isolated catalog destination metadata slice maps /Names /Dests name trees and legacy /Dests dictionaries into document-destination review metadata while preserving XMP title precedence, DocInfo fallback, and visible-text isolation., and current page Widget annotation URI/destination link promotion with hidden and detached widget exclusion, and image XObject SMask stream-filter boundary metadata before RGB preview planning, and AcroForm /XFA signature-like packet field/data-path review boundaries, and DCTDecode CMYK/YCCK image /Decode RGB preview boundaries, and RichMedia target FileSpec embedded-stream Params checksum/date review boundaries. The isolated xref-stream type-2 object-stream base slice maps malformed compressed-object rows that target the direct /ObjStm base itself: the current direct object-stream base remains authoritative while explicit type-2 member indexes still select compressed page objects, excluding decoy members and orphan fallback streams before WordPress text extraction. The isolated parser stream dictionary escape-boundary slice maps escaped top-level /Fil#74er and /Len#67th keys while ignoring fake /Filter, /Length, and /DecodeParms names inside literal strings, comments, arrays, and nested dictionaries before WordPress paragraph rendering. The isolated parser xref offset-owner boundary maps stream-owned fake xref table payload rejection so stale page trees cannot override current owner-boundary text before WordPress paragraph extraction. The isolated Indexed image Decode soft-mask boundary maps PDF Indexed color-space default Decode [0 hival], decoded-index hival clipping, and soft-mask alpha preview before RGB WordPress image review without raster execution. The isolated page font resource ToUnicode/width boundary maps nearest page /Resources font lookup so ancestor page-tree ToUnicode maps and CID widths do not leak into leaf pages with their own resources before WordPress text import. The isolated Type0 indirect CIDFont width boundary maps descendant /W arrays stored in indirect objects through Encoding CMap CIDs before WordPress text-gap grouping. The isolated object-stream filter owner boundary maps /Type /ObjStm carrier streams as compressed-object containers, excluding their decoded payloads from fallback visible text while preserving direct filtered stream extraction. The isolated xref object-stream filter-chain operand slice maps xref-selected object streams whose indirect Filter, DecodeParms, N, and First operands are recovered from compressed helper object streams before WordPress text extraction. The isolated xref-stream object-owner boundary maps direct stream /Length ownership so fake /Type /XRef object headers embedded inside another stream payload cannot own startxref or redirect current-base page extraction. The isolated forced-OCR merged-cell geometry slice maps supplied table-recognition cells through native row_ids/col_ids geometry so WordPress review metadata preserves colspan, rowspan, anchors, grid cells, and grid bboxes before Markdown formatting drops span occupancy. The accepted 2026-06-02 security/font/xref/table/form/image/outline/parser batch adds permission-handler review, indirect Type0 /DW width resolution, incremental free-entry xref suppression, spanning table header grid review, XFA signature widget review, ICC soft-mask Decode review, named-destination Fit operand normalization, and stream-owned DecodeParms owner boundaries. The accepted 2026-06-02 metadata/image/xref/parser/outline batch adds catalog Collection schema associated-file metadata, calibrated soft-mask review, incremental object-stream free repair, nested object-stream filter fail-closed fallback, and remote outline destination action review. The accepted nine-handoff current-base batch maps inline image Indexed/JBIG2/ImageMask review, multiline OCR table header folding, page associated-file checksum state, public-key recipient permission envelopes, indirect simple-font encoding/width operands, latest trailer Encrypt/ID precedence, xref-stream zero-width object-stream member-index review, AcroForm widget appearance/action cycle metadata, and article-thread bead navigation metadata without Python/models/external PDF tools. The accepted structure-tree metadata rebase maps catalog /StructTreeRoot language, RoleMap, Namespaces, structure elements, MCID/page links, alternate text, actual text, expansion text, IDs, classes, and revision review metadata without leaking review text into visible WordPress paragraphs. The accepted five-slice current-base batch maps page /StructParents ParentTree reading order, associated-file XMP/PDF-A provenance review, inline image filter-array abbreviation/null-entry boundaries, Type0 ToUnicode surrogate/CID width grouping, and hybrid xref /Prev underdeclared trailer-size repair without Python/models/external PDF tools. The accepted ColorKey image mask rebase maps raw PDF /Mask component ranges before /Decode-adjusted RGB preview values and records transparent/opaque sample review metadata without executing pypdfium, PIL, Python models, or external PDF tools. The accepted 2026-06-02 eight-slice current-base batch maps rotated rowspan table header grids, encrypted/signature permission digest reviews, AcroForm choice/rich-text submit/reset state, page article-thread/PieceInfo/StructTree marked-content review, metadata language OutputIntent associated-file review, CIDSet vertical surrogate widths, outline target page-review transition/thread enrichment, and Indexed soft-mask transfer functions without Python/models/external PDF tools. The accepted StructTree associated-file page-review slice maps StructElem /AF FileSpec provenance onto page MCID review rows while keeping catalog/page associated files separate and embedded payload/XMP/ICC bytes review-only. The accepted 2026-06-02 18:29Z current-base batch maps runtime benchmark API callbacks, outline destination action context, xref repair boundaries, security AcroForm permission actions, XFA widget current values, OCR polygon table geometry, catalog name-tree metadata bounds, and named color-space soft-mask image preview metadata without Python/models/external PDF tools. The accepted 2026-06-02 18:43Z current-base batch maps runtime conversion boundary planning, platform/embedded AcroForm action review, CID encoding widths, inline ImageMask preview rows, associated related-file metadata, outline name-tree Limits, annotation reply threads, xref-stream owner boundaries, DSS multi-signature matching, duplicate object-stream zero-width guards, and table grid-border assignment review without Python/models/external PDF tools. The accepted 2026-06-02 18:55Z nine-slice current-base batch maps benchmark score-file verification, AcroForm seed-lock action review, DeviceN soft-mask transfer preview, widget link promotion, current object-stream filter repair, portfolio PieceInfo review, public-key permission metadata, Type3 CMap/CIDSet width grouping, and hybrid generation-one page repair without Python/models/external PDF tools. The accepted 2026-06-02 19:07Z six-slice current-base batch maps convert.py/benchmark error review, JPX SMaskInData ColorKey precedence, encrypted signed-PDF ByteRange security review, xref object-stream carrier-generation replacement, long ToUnicode bfrange surrogate width grouping, and outline Fit-family action-chain metadata without Python/models/external PDF tools. The accepted 2026-06-02 19:08Z five-slice current-base batch maps stream-length startxref recovery, AcroForm widget appearance-state review, associated FileSpec XMP/PieceInfo review, page StructParents/thread/markup composition, and OCR span grid-cell table geometry without Python/models/external PDF tools. The accepted 2026-06-02 20:00Z current-base batch maps signature ByteRange revision/DSS VRI placement review, page StructParent user properties, and OCR border-conflict table grid review without Python/models/external PDF tools. The accepted 2026-06-02 20:10Z current-base batch maps parser filter-array dictionary fail-closed behavior, DeviceN JPX transfer boundaries, associated-file collection schema metadata, indirect CIDSet width generation boundaries, sparse xref-stream /Prev generation rows, and marker server config error planning without Python/models/external PDF tools. The accepted 2026-06-02 20:21Z current-base batch maps widget annotation ParentTree metadata, rowspanned table header-grid references, and certificate permission OpenAction review metadata without Python/models/external PDF tools. The accepted 2026-06-02 20:26Z current-base batch maps AcroForm FileSpec action-resource reviews, escaped name array parser boundaries, Indexed ColorKey transfer image metadata, Type0 CMap descriptor width grouping, outline structure destination context, and xref-stream generation Index metadata without Python/models/external PDF tools. The isolated parser name-array comment boundary maps PDF comments inside optional-content arrays so comment-only object references and escaped names do not enable hidden layers or leak review text before WordPress paragraph extraction. The accepted 2026-06-02 seven-handoff current-base batch maps Type3 CharProc Unicode fallback, supplied table span-grid section/caption context, page thread StructTree associated-file provenance, and FieldMDP ByteRange target coverage, plus non-denominator JPEG2000 ImageMask Decode preview, benchmark memory snapshot telemetry, and AcroForm submit/reset resource review. The isolated PDF/A associated name-tree metadata slice maps current catalog /Names /EmbeddedFiles rows into associated-file review metadata with root and attachment-local OutputIntent provenance while keeping payloads and profile bytes review-only. The accepted 2026-06-02 xref/parser/page/link batch maps object-stream stream-dictionary generation ownership, Prev hybrid Size root generation repair, malformed xref-stream Index repair, Form XObject StructTree clipping, and comment-aware array/dictionary/string token parsing, plus non-denominator local link target context and hybrid free-owner review rows. The accepted 2026-06-02 outline/form/xref/image batch maps structured outline destination action context, outline name-tree action structure review rows, AcroForm widget appearance resource actions, and xref object-stream Prev-free carrier precedence, plus non-denominator ICCBased soft-mask preview rows. The accepted 2026-06-02 metadata/xref/annotation/image/attachment batch maps trailer ID/language/viewer-preference metadata, linearized xref hint/startxref precedence, RichMedia Screen/Rendition selectors, DeviceN/Separation soft-mask Decode image review, and Portfolio associated-file PieceInfo checksum state without Python/models/external PDF tools. The accepted 2026-06-02 table/font/layout/parser batch maps rotated table header accessibility grids, Type3 color glyph resource widths, rotated multi-column layout ordering, and object-stream generation offset ownership without Python/models/external PDF tools. The accepted 2026-06-02 metadata/outline/xref/page/runtime batch maps XMP language and catalog MarkInfo review, direct named-destination Thread action context, current object-stream carrier rebuild before stale Prev generations, page associated files on marked-content Alt rows, and marker-server upload pagination error boundaries without Python/models/external PDF tools. The accepted 2026-06-02 output/runtime preview batch maps saved output artifact metadata plus runtime-only preview HTML while keeping image payload bytes out of visible WordPress content. The accepted 2026-06-02 attachment/font/table batch maps associated FileSpec AFRelationship checksum review, Type3 Encoding CMap source spacing, and forced-OCR table structure assignment ordering without Python/models/external PDF tools. The accepted 2026-06-02 outline/page/xref/runtime/form/parser batch maps server upload error artifact roundtrips, XFA signature widget action review, outline named-destination security/thread context, ParentTree associated-file rows, fail-closed xref-stream filter errors, and hybrid linearized xref generation repair without Python/models/external PDF tools., and current catalog associated FileSpec AF rows with OutputIntent provenance, PieceInfo/checksum review metadata, outline target context, and XMP/Info fallback boundaries, and inline JPX ColorKey output-preview rows plus caption-bound forced-OCR table cellspan header grids, and Type0/Type3 source-space font grouping, exact-generation xref-stream dictionary helper recovery, marker-server completed-output pagination, outline destination page-label structure summaries, page annotation StructTree associated transition context, and /Prev hybrid xref-stream carrier recovery The accepted security/parser/image/form current-base batch maps AcroForm DSS action FileSpec attachment review, xref-stream compressed helper operand ownership, inline image color-space soft-mask output preview rows, and AcroForm widget XFA/action/appearance value boundaries without executing actions, rendering appearances, rasterizing unsupported JPX streams, running Python/models, or exposing payload bytes as visible WordPress text. The accepted font/runtime/output/xref/table/outline current-base batch maps CID width resource spacing for styled spans, marker server upload benchmark error artifacts, saved Markdown image PNG quality review, hybrid xref object-stream carrier generation ownership, OCR table span-grid benchmark quality gates, and outline destination Thread action target metadata without executing Python/models, live servers, PDF actions, raster renderers, or external PDF tools. The accepted 10-handoff markerPDF current-base batch maps rotated OCR header captions, xref-stream Prev/Index width repair, DSS permission action-chain review, marker server upload pagination, predefined UCS2 vertical Type0 CIDSet spacing, AcroForm widget appearance export values, XMP name-tree associated schema metadata, JPEG2000 color-space soft-mask output previews, annotation StructTree OBJR fallback associations, and inline JPX/CMap boundary repair without executing Python/models, live servers, PDF actions, raster renderers, or external PDF tools. The accepted output table-image artifact current-base slice maps Markdown table image artifact accounting for saved, embedded, missing, and unreferenced PNG artifacts without changing mapped-denominator coverage or executing Streamlit/PDFium/Python/model tooling. The accepted 2026-06-02 seven-handoff batch maps metadata associated PieceInfo OutputIntent review, public-key DSS permission boundaries, marker runtime server config artifact review, hybrid xref free-entry ownership, parser name-array/comment escape boundaries, DeviceN ICC soft-mask transfer previews, and AcroForm signature widget lock resources without Python/models/external PDF tools. The accepted 2026-06-02 malformed-CMap current-wave slice maps rejection of malformed ToUnicode CMap Filter dictionary operands before CMap decoding, preserving safe Identity-H fallback text and review metadata without Python/models/external PDF tools. The accepted 2026-06-03 filter/parser/metadata current-base batch maps CCITTFax and DCTDecode image-filter boundaries, malformed CMap fallback, object-stream skipped header indexes, CMap source-width defaults, Form-resource Image XObject exclusion, outline/StructElem metadata, and page resource inheritance without Python/models/external PDF tools. The accepted 2026-06-03 supplied layout/runtime current-base batch maps selected-page supplied layout/order artifact alignment and convert.py::main runtime preflight planning without Python/models/external PDF tools. The accepted 2026-06-03 geometry/font/type3/link current-base batch maps named-bbox table geometry normalization, quote-operator styled-span advance geometry, Type3 CharProc fallback-payload exclusion, and rotated/UserUnit link rectangle promotion without Python/models/external PDF tools.",
        "mapped": 765,
        "mappedBenchmarkPairs": 2,
        "mappedBenchmarkSurrogatePairs": 4,
        "mappedBenchmarkSuppliedDocumentExcerpts": 4,
        "source": "Shallow upstream clone at .upstream-cache/markerpdf plus git ls-tree inventory of all 78 tracked upstream paths, with targeted reads of 78 workflow/benchmark/example/source/runtime/support paths: README.md, .github/workflows/tests.yml, benchmarks/overall.py, marker/benchmark/scoring.py, marker/benchmark/table.py, scripts/verify_benchmark_scores.py, marker/pdf/extract_text.py, marker/pdf/utils.py, marker/pdf/images.py, marker/postprocessors/markdown.py, marker/cleaners/*.py, marker/layout/layout.py, marker/layout/order.py, marker/ocr/heuristics.py, marker/ocr/utils.py, marker/ocr/lang.py, marker/ocr/tesseract.py, marker/ocr/detection.py, marker/ocr/recognition.py, marker/images/save.py, marker/images/extract.py, marker/schema/block.py, marker/schema/bbox.py, marker/schema/merged.py, marker/schema/page.py, marker/debug/data.py, marker/debug/render.py, marker/equations/equations.py, marker/equations/inference.py, marker/tables/table.py, marker/tables/utils.py, marker/output.py, marker/settings.py, marker/models.py, marker/utils.py, marker/logger.py, marker/convert.py, marker_app.py, run_marker_app.py, marker_server.py, top-level convert.py, top-level convert_single.py, top-level chunk_convert.py, top-level chunk_convert.sh, and data/examples/*/*.md paths. The native inventory now maps the benchmark file enumeration/reference-loading/method-loop/md_out/report aggregation shape in benchmarks/overall.py for supplied conversion text, marker/pdf/extract_text.py::naive_get_text/get_length_of_text text-length preflight at a native content-stream boundary, marker/convert.py::convert_single_pdf early language/filetype/page-metadata/lowres-image planning at a supplied-page boundary, marker/layout/layout.py::get_batch_size and supplied surya_layout assignment, marker/debug/data.py::dump_bbox_debug_data bbox JSON export plus draw_page_debug_images/draw_layout_page_debug_images artifact planning, marker/debug/render.py::render_on_image overlay operation planning, marker/tables/table.py::get_table_boxes for table crop planning, marker/equations/inference.py::get_batch_size/get_latex_batched Texify batch planning and generated max-token sentinel filtering over supplied outputs, marker/models.py model setup/load_all_models planning plus PYTORCH_ENABLE_MPS_FALLBACK behavior, marker/utils.py flush_cuda_memory CUDA-only empty-cache behavior, marker_app.py::img_to_html/markdown_insert_images image preview embedding plus marker_app.py::open_pdf/page_count/get_page_image PDF upload preview page counting and render planning, marker.logger.py::configure_logging logger/warning suppression planning, run_marker_app.py::run Streamlit command/environment planning, marker_app.py import-time environment setup, marker_server.py CommonParams normalization/upload validation/local response/remote polling boundaries, top-level convert.py batch task planning/metadata_file loading/process_single_pdf semantics for supplied converters, top-level convert_single.py language parsing/option handoff/save semantics for supplied single-document converters, chunk_convert.py/chunk_convert.sh environment validation plus per-device Marker job fanout planning, native PDF TJ array numeric positioning adjustments before same-line Tm gap decisions, native non-identity Tm horizontal-scale positioning before same-line Tm gap decisions, native ASCIIHexDecode, ASCII85Decode, and RunLengthDecode stream-filter decoding before text-token parsing, native indirect /Filter plus benign /DecodeParms resolution before stream decoding, and native FlateDecode Predictor 2 and PNG Predictor 10-15 DecodeParms reconstruction before text-token parsing. The locked tabled-pdf 0.1.4 source tarball was downloaded to /tmp/markerpdf-tabled-src for targeted dependency inspection only: tabled_pdf-0.1.4.tar.gz, 28,889 bytes, sha256 23af02dd04f64fcbda3c5fe67fbee740ab7cf12277e3fd949c40000c5b63a30d, with tabled/inference/detection.py::merge_boxes and merge_tables mapped for Marker's adjacent table-box merge boundary plus tabled/inference/recognition.py::{get_cells,recognize_tables}, tabled/assignment.py::{assign_rows_columns,merge_multiline_rows,handle_rowcol_spans}, tabled/heuristics/cells.py::{cluster_coords,find_column_separators,assign_cells_to_columns}, and tabled/formats/markdown.py::markdown_format mapped for supplied table-recognition handoff semantics. The CI Drive archive from .github/workflows/tests.yml was header-probed and downloaded to /tmp for inspection: benchmark_data_short.zip, 6,212,657 bytes, sha256 c7511a4f5055e949a7a7c293be5541942433059d7841965f056d7f9b441a41ad, containing benchmark_data/pdfs/{multicolcnn.pdf,switch_trans.pdf} and benchmark_data/references/{multicolcnn.md,switch_trans.md}. The Surya language-code and recognition-tokenizer boundary was cross-checked against the locked surya-ocr 0.6.13 wheel hash from poetry.lock (94 code entries, TOTAL_TOKENS 65536, TOKEN_OFFSET 3). The mapped surrogate fixtures sample README-linked committed output pairs for multicolcnn.pdf, switch_trans.pdf, thinkpython.pdf, and thinkos.pdf from data/examples/marker/*.md and data/examples/nougat/*.md; the mapped external CI fixture records hash-identified excerpts from the two benchmark_data PDF/reference pairs; the fuller supplied multicolcnn, switch_trans introduction, and switch_trans Contents table fixtures record pdftext/layout/order/table dictionaries for document-level native callbacks without live model downloads; the supplied Formula fixture records Formula layout metadata and a Texify-style equation result dictionary through SuppliedDocumentConverter. The isolated literal-escape slice maps PDF literal-string escape decoding in Tj/TJ operands at the native content-stream boundary, the isolated indirect filter slice maps indirect /Filter names/arrays plus null or Predictor 1 /DecodeParms handling before WordPress paragraph rendering, the isolated ASCII85 slice maps /ASCII85Decode and /A85 stream decoding including z zero groups, partial groups, and stacked ASCII85-to-Flate decoding before native text parsing, the isolated RunLength slice maps /RunLengthDecode and /RL literal/repeated runs plus EOD handling before native text parsing, and the isolated Flate predictor slice maps Predictor 2 plus PNG row filters 0-4 for Predictor 10-15 before native text parsing. The isolated PDF name escape slice maps PDF name #XX decoding for stream filter names and font resource names before native stream decoding and ToUnicode text lookup, including /Fl#61teDecode and /F#31 coverage for WordPress paragraph extraction. The isolated ToUnicode bfrange-array slice maps explicit destination arrays in beginbfrange CMap blocks, so encoded glyph IDs can emit non-sequential Unicode strings such as Import Blocks before Gutenberg paragraph rendering. The isolated ToUnicode usecmap slice maps named base CMap inheritance before local CMap overrides, so derived ToUnicode maps can emit inherited and locally defined glyph text before Gutenberg paragraph rendering. The isolated PDF literal UTF-16 BOM slice maps UTF-16BE and UTF-16LE BOM decoding for literal-string Tj operands after PDF literal escape handling, so UTF-16 PDF string payloads render as WordPress paragraphs instead of raw BOM bytes. The isolated ToUnicode codespacerange fallback slice maps CMap source-code width selection before unmapped CID fallback, so unrelated one-byte mappings do not split two-byte codes before WordPress paragraph extraction. The isolated simple-font WinAnsiEncoding slice maps high-bit punctuation bytes such as curly quotes and en dashes before WordPress paragraph extraction when no ToUnicode CMap is present. The isolated page /Contents slice maps catalog page-tree order, array-valued /Contents merging, page-level naive_get_text boundaries, and exclusion of unreferenced streams before WordPress paragraph extraction. The isolated Type0 Identity-H/V font CMap slice maps direct font Encoding source widths when no ToUnicode CMap is present, so two-byte CIDs in hex, literal, and TJ operands tokenize before fallback Unicode paragraph extraction instead of rendering raw NUL bytes. The isolated Form XObject invocation slice maps page-resource /Subtype /Form XObjects only when the current page content stream invokes their resource name with Do, preserving execution order while continuing to exclude unreferenced form streams before WordPress paragraph extraction. The isolated nested Form XObject resource-boundary slice maps per-form /Resources font dictionaries through recursive Form XObject invocation, aliasing local font names so page /F1, parent-form /F1, and child-form /F1 resolve independently before WordPress paragraph extraction. The isolated page-resource inheritance and PDF object-stream/xref slices map inherited page-tree /Resources font dictionaries, PDF 1.5 /ObjStm member expansion, xref-stream type-2 compressed-object membership, stale unlisted compressed object exclusion, and font resource lookup from decoded object bodies before WordPress paragraph extraction. The isolated pdftext dictionary-core slice maps real pdftext dictionary validation, null font-name handling, span char offsets/chars/rotation preservation, and selected-range pdftext_options metadata for supplied WordPress imports. The isolated LZWDecode stream-filter slice maps LZW-compressed PDF content stream decoding before native text-token parsing, including clear/end codes, variable-width code growth, and dictionary reset behavior for WordPress paragraph imports. The isolated native PDF outline/Info metadata slice maps catalog /Outlines traversal and trailer /Info string decoding into upstream-shaped pdf_toc/document_info metadata before WordPress bookmark import. The isolated native PDF named-destination outline slice maps /Outlines entries through direct destination arrays, /A /GoTo actions, catalog /Names /Dests name trees with Kids, legacy /Dests dictionaries, PDF name escapes, UTF-16BE title strings, and stale destination exclusion before WordPress bookmark import. The isolated PDF link annotation URI slice maps catalog-ordered page /Annots entries, /Subtype /Link URI actions, safe URI filtering, bbox overlap against supplied pdftext spans, and Markdown link emission before WordPress paragraph rendering. The isolated DCTDecode filter-boundary slice maps /DCTDecode and /DCT as image-only stream filters skipped before native text tokenization, preventing JPEG bytes containing PDF-looking text operators from leaking into WordPress paragraphs. The isolated CCITTFaxDecode filter-boundary slice maps /CCITTFaxDecode and /CCF as image-only stream filters skipped before native text tokenization, including direct and indirect DecodeParms cases, so fax/scanner bytes do not leak into WordPress paragraphs. The isolated catalog /PageLabels slice maps page label number-tree extraction, including /Kids traversal, direct and indirect /Nums dictionaries, decimal/roman/alphabetic formatting, /P prefixes, /St starts, and one-based fallback labels aligned to catalog page /Contents text extraction. The isolated catalog XMP metadata slice maps catalog /Metadata stream decoding, XML entity cleanup, dc:title/dc:creator/dc:description/dc:subject extraction, xmp:CreateDate/ModifyDate fields, source tagging, and trailer /Info fallback before WordPress document review metadata. The isolated CIDFont width metrics slice maps descendant-font /W array and /DW default width parsing into native text-advance decisions, preserving tight CID glyph clusters while adding spaces for narrow advances before WordPress paragraph rendering. The isolated inline-image boundary slice maps page content BI/ID/EI image data skipping so inline raster bytes that contain PDF-looking text operators cannot leak into WordPress paragraph extraction. The isolated CIDSet subset glyph-width slice maps compressed /CIDSet stream decoding through descendant CIDFont descriptors and high-order-bit CID membership into default /DW 1000 advance decisions before WordPress paragraph rendering. The latest accepted markerPDF batch adds PdfAcroFormExtractor native catalog /AcroForm traversal for field flags/default appearance review metadata, PdfImageRenderer DCTDecode CMYK Adobe APP14 transform planning for WordPress RGB preview metadata, and LZWDecode DecodeParms EarlyChange/Predictor boundaries in PdfTextExtractor. The latest accepted markerPDF batch adds stream filter-chain DecodeParms-array handling in PdfTextExtractor, PDF/A OutputIntent profile metadata in PdfMetadataExtractor, and PdfOutlineExtractor remote GoTo action extraction through getRemoteGoToActions(). The latest accepted markerPDF batch adds JPXDecode/JBIG2Decode image-filter boundaries in PdfTextExtractor so raster payload bytes are skipped before native text-token parsing. The latest accepted markerPDF batch adds generation-aware hybrid xref/free entry handling plus encrypted-PDF preflight in PdfTextExtractor and DocMDP signature permission metadata in PdfAcroFormExtractor. The latest accepted markerPDF batch adds CIDFont vertical /WMode /DW2 /W2 text grouping, page rotation/UserUnit/box preview geometry, and StructTreeRoot MCID reading-order extraction, The latest accepted markerPDF batch adds marked-content /ActualText and /Alt replacement, optional content group visibility, and Type3 /CharProcs d0/d1 width handling. The latest accepted markerPDF batch adds trailer /ID fingerprint metadata, catalog language/viewer preference review metadata, and catalog OpenAction URI/Launch/GoTo/GoToR safety-review extraction without executing PDF actions. The latest accepted markerPDF batch adds length-bounded ASCIIHex/RunLength stream payload handling, destination Fit/XYZ page-view metadata, XFA packet review metadata, and EmbeddedFiles name-tree attachment metadata without Python or external PDF tools. The latest accepted markerPDF batch adds native PDF FontDescriptor /Flags styled-span extraction that carries /FontName, /Flags, and /FontWeight into upstream-style font names for WordPress bold/italic cleanup without pdftext or pypdfium. The latest accepted markerPDF batch adds page /Dur display duration, /Trans transition dictionaries, and page /AA open/close action review metadata without executing PDF actions. The latest accepted markerPDF batch adds AcroForm /CO calculation order and field/widget /AA JavaScript action review metadata for keystroke, format, validation, and calculation triggers without executing PDF JavaScript. The latest accepted markerPDF batch adds PDF Portfolio /Collection metadata, Filespec /CI collection item dictionaries, and catalog/Filespec /PieceInfo review metadata for EmbeddedFiles without external PDF tools. The latest accepted markerPDF batch adds AcroForm current/default value-state metadata from field /V, /DV, /I, /Opt, and widget /AS appearance states without executing form actions. The isolated ToUnicode CMap comment slice maps PDF/PostScript % line-comment stripping outside literal and hex tokens before CMapName/usecmap/codespace/bfchar/bfrange parsing, preventing commented fake glyph mappings from overriding WordPress paragraph text. The isolated indirect name-tree destination slice maps object-table resolution for /Names /Dests string keys plus indirect destination dictionaries before TOC rows, page-view metadata, and catalog OpenAction review output. The isolated annotation geometry slice maps /Line, /InkList, /Vertices, /RD, and /CL annotation geometry into review metadata, including derived bounding boxes, shape rectangles, centers, radii, paths, and line endings without rendering appearances or executing actions. The latest accepted markerPDF batch adds ICCBased image profile parsing, alternate/range metadata, direct/indirect soft-mask metadata, matte unblend review flags, and RGB preview intent in PdfImageRenderer without executing pypdfium/PIL. The latest accepted markerPDF batch adds Standard security-handler permission metadata from trailer or xref-stream /Encrypt dictionaries, including permission flags, crypt filters, EncryptMetadata, hashed /Perms, and fail-closed review-only content extraction without exposing raw owner/user keys. The latest accepted markerPDF batch adds native Tr text rendering mode tracking through text extraction paths, suppressing invisible modes 3 and 7 while preserving visible clipping modes 4-6 and graphics-state restore semantics. The latest accepted markerPDF batch adds embedded-file /Params /CheckSum byte-string normalization, current payload MD5 computation, checksum algorithm labeling, and match/mismatch review metadata without dropping attachments. The latest accepted markerPDF batch adds raw-preserving UTC normalization for timezone-bearing XMP ISO-8601 and PDF Info D: metadata dates while leaving timezone-free dates raw-only., and The isolated XFA/XDP stream slice maps BOM-prefixed UTF-16 /AcroForm /XFA stream XML decoding, xdp:xdp packet-name extraction, source-encoding review metadata, and dynamic XFA data exclusion before WordPress form review rendering., and The isolated stream-length recovery slice maps native stream payload extraction when direct or indirect /Length values are stale, missing, or land before a valid endstream terminator while preserving fail-closed unsupported filter behavior., and The isolated trailer-root generation slice maps incremental PDF catalog recovery from the latest startxref trailer/xref-stream /Root with /Prev and hybrid /XRefStm fallback, preventing stale catalog order from winning., and The isolated standard-font encoding slice maps StandardEncoding, MacRomanEncoding through /BaseEncoding, /Differences merging, and implicit SymbolEncoding from /BaseFont /Symbol before native PDF text extraction., and The isolated PDFDocEncoding metadata slice maps non-BOM PDF text-string decoding for trailer /Info metadata, preserving UTF-16 BOM handling and preventing encoded metadata bytes from reaching WordPress review text unnormalized. The isolated page-tree cycle/resource guard slice maps duplicate reachable /Page leaf de-duplication, cyclic /Kids traversal protection, and inherited /Resources lineage stopping at non-/Pages parents before WordPress text extraction and MarkerApp preview inventory. The isolated stream-filter name-array slice maps direct /Filter arrays whose filter-name entries are indirect objects, ignores null entries, and preserves fail-closed cyclic filter resolution before native text extraction. The isolated XMP/Info encoding fallback slice maps BOM, declared XML encodings, bounded undeclared Windows-1252/ISO-8859-1 fallback, and trailer Info field completion before WordPress metadata review. The isolated annotation rotation slice maps text-markup QuadPoints through inherited MediaBox/CropBox and /Rotate 90/270 into marker/pdftext display coordinates before applying review spans. The isolated CIDFont descriptor-width slice maps descendant CIDFonts with FontDescriptor but no /DW, /W, or /CIDSet to default /DW 1000 before native text-advance grouping. The isolated xref-stream /Prev generation repair slice maps exact byte-offset direct-object selection when current xref-stream rows omit generation fields, preventing stale previous-generation page text from winning before WordPress paragraph extraction. The isolated metadata xref-stream trailer slice maps latest startxref xref-stream dictionaries as current trailer sources for /Root, /Info, and /ID before stale textual trailer fallbacks, and The isolated page-box indirect-operand slice maps PDF rectangle arrays whose numeric coordinates are indirect references before marker_app-style preview sizing, including crop/trim boxes, inherited rotation, and page-local UserUnit, and The isolated Form XObject matrix/BBox slice maps PDF graphics-state cm, form /Matrix composition, and form-local /BBox clipping before native text extraction, matching the upstream PDFium/pdftext boundary used by markerPDF, and The isolated supplied merged-table boundary slice maps markerPDF table detection merge behavior before equation/image arbitration, preserving one table Markdown block and excluding duplicate supplied Formula/Picture insertions The isolated image decode/stencil slice maps marker/pdf/images.py image rendering boundaries for base image /Decode arrays plus /ImageMask stencil decode opacity before RGB preview metadata, without executing pypdfium/PIL or external PDF tools. The isolated annotation appearance slice maps selected page annotation /AP /N Form XObject resources, Matrix, and BBox clipping into native text import boundaries, preserving nested appearance resources without rendering or executing PDF actions. The isolated AcroForm calculation/signature slice maps catalog /AcroForm /SigFlags, /CO calculation order, field/widget calculate actions as review metadata, signature dictionaries, and signed /Lock field state without executing JavaScript, validation, or signing. The isolated Type0 /Encoding CMap slice maps mixed one-byte/two-byte code-space boundaries and CID maps into fallback text-source segmentation for descendant CIDFont width grouping when /ToUnicode is absent. The isolated AcroForm XFA signature widget-state slice maps page-referenced widget annotation /F flags, /AS appearance-state metadata, annotation order, signature dictionary review metadata, and XDP packet metadata without executing form actions or signing. The isolated security preflight slice maps encryption permission metadata and AcroForm signature ByteRange review into a native import decision without decryption, signing, signature validation, or external PDF tools. The isolated Filespec PieceInfo private-stream slice maps /PieceInfo /Private stream metadata and excludes those payloads from fallback text extraction. The isolated MarkInfo/page-associated file slice maps catalog /MarkInfo flags and page-associated /AF Filespec review metadata, including Source/Alternative relationships, MIME type, size, and SHA-256 hashes while keeping embedded payloads out of visible text. The isolated object-stream/xref free-entry conflict slice maps current hybrid xref table free rows taking precedence over stale companion /XRefStm type-2 object-stream rows. The isolated AcroForm default-resource slice maps /AcroForm /DR /Font review metadata and /DA font-resource resolution without executing form actions or rendering appearances. The isolated OpenAction next-chain slice maps catalog /OpenAction action dictionaries through the bounded /Next walker so chained URI, Launch, and GoTo followups are review-only metadata. The isolated associated-file PieceInfo checksum slice maps catalog /AF Filespec /PieceInfo /Private stream /Params /CheckSum boundaries, including declared size/length, decoded private stream checksum calculation, match/mismatch state, MIME type, timestamps, and review-only payload exclusion before WordPress paragraph rendering. The isolated page-cm text-operator slice maps ordinary page content graphics-state cm transforms and q/Q restore behavior before text-position grouping, preventing transformed text from being split into stale WordPress paragraphs. The isolated marker-app preview crop-boundary slice maps CropBox-to-MediaBox clipping, out-of-media zero-area preview detection, rotation axis swaps, and UserUnit scaled preview sizing before WordPress preview planning. The isolated predefined vertical CMap slice maps Type0 /Encoding CMap names ending in -V or -H into writing-mode detection before ToUnicode text grouping, preserving vertical CIDFont /W2, /DW2, and /CIDSet width boundaries without Python/models/external PDF tools. The isolated StructTree thread precedence slice maps tagged PDF StructTreeRoot RoleMap MCID reading order as authoritative over catalog /Threads bead fallback when marked-content segments are recoverable, while preserving Threads fallback for untagged/no-match pages before WordPress paragraph rendering. The isolated indirect Type0 Encoding slice maps object-valued /Encoding names such as indirect /Identity-H into existing CIDSet/default-width and FontDescriptor flag paths before native WordPress text extraction. The isolated xref-stream object-stream generation repair slice maps omitted type-2 compressed-object member indexes to object-stream header object numbers while keeping explicit indexes strict, excluding stale direct generations and decoy object-stream members before WordPress text extraction. The isolated token stream owner slice maps direct stream owner lookup through token-aware object body ranges, so object-like text inside PDF strings before a stream payload cannot reassign private stream ownership or leak PieceInfo private payload text into WordPress paragraphs. The isolated page UserUnit markup slice maps text-markup QuadPoints through inherited page boxes, rotation, and page-local UserUnit before applying review metadata to supplied marker/pdftext display-space spans, preventing unscaled page-space decoy annotations. The isolated AcroForm appearance/value/action slice maps widget /AP /N state dictionaries and direct normal appearance streams into review metadata, including BBox, Matrix, filters, decoded hashes, and resource names, while field /V remains authoritative and /A /AA actions stay non-executing. The isolated supplied OCR prediction table slice maps upstream OCR prediction object shapes with text_lines/lines through forced table recognition, proving supplied OCR cell text reaches Markdown/WordPress table rendering while stale pdftext table lines are excluded without live Python/model/external PDF tools. The isolated Indexed ICCBased JBIG2 soft-mask image slice maps upstream render_image_rgb preview boundaries by recording palette lookup bytes, ICC profile metadata, JBIG2Globals presence, and soft-mask decode opacity before any future raster backend executes. The isolated parser stream-filter object-boundary slice maps current xref-selected direct object bodies before fallback stream decoding, proving stale/free filtered objects and nested stream-looking tokens inside payloads do not leak into visible text. The isolated PieceInfo metadata boundary slice maps catalog private PieceInfo Metadata and OutputIntents as review-only dictionaries, proving nested private XMP/PDF-A data is not promoted into document-level metadata roots while visible text stays clean. The isolated outline name-tree transition/action slice maps outline destinations and GoTo actions that resolve through /Names /Dests to target page presentation/action review metadata, proving page actions remain non-executing and hidden from visible text. The isolated xref-stream type-2 object-stream base slice maps malformed compressed-object rows that target the direct /ObjStm base itself: the current direct object-stream base remains authoritative while explicit type-2 member indexes still select compressed page objects, excluding decoy members and orphan fallback streams before WordPress text extraction. The isolated parser stream dictionary escape-boundary slice maps escaped top-level /Fil#74er and /Len#67th keys while ignoring fake /Filter, /Length, and /DecodeParms names inside literal strings, comments, arrays, and nested dictionaries before WordPress paragraph rendering. The isolated parser xref offset-owner boundary maps stream-owned fake xref table payload rejection so stale page trees cannot override current owner-boundary text before WordPress paragraph extraction. The isolated Indexed image Decode soft-mask boundary maps PDF Indexed color-space default Decode [0 hival], decoded-index hival clipping, and soft-mask alpha preview before RGB WordPress image review without raster execution. The isolated page font resource ToUnicode/width boundary maps nearest page /Resources font lookup so ancestor page-tree ToUnicode maps and CID widths do not leak into leaf pages with their own resources before WordPress text import. The isolated Type0 indirect CIDFont width boundary maps descendant /W arrays stored in indirect objects through Encoding CMap CIDs before WordPress text-gap grouping. The isolated object-stream filter owner boundary maps /Type /ObjStm carrier streams as compressed-object containers, excluding their decoded payloads from fallback visible text while preserving direct filtered stream extraction. Native parser coverage now includes iterative object-stream expansion for indirect filter-chain operands recovered from compressed helper object streams. Native parser coverage now includes direct stream-length object-owner scanning for embedded fake xref-stream object headers. Native supplied-document conversion now preserves forced-OCR merged-cell geometry metadata for WordPress table review. The accepted five-slice current-base batch maps page /StructParents ParentTree reading order, associated-file XMP/PDF-A provenance review, inline image filter-array abbreviation/null-entry boundaries, Type0 ToUnicode surrogate/CID width grouping, and hybrid xref /Prev underdeclared trailer-size repair without Python/models/external PDF tools. The accepted ColorKey image mask rebase maps raw PDF /Mask component ranges before /Decode-adjusted RGB preview values and records transparent/opaque sample review metadata without executing pypdfium, PIL, Python models, or external PDF tools. The accepted 2026-06-02 18:29Z current-base batch maps runtime benchmark API callbacks, outline destination action context, xref repair boundaries, security AcroForm permission actions, XFA widget current values, OCR polygon table geometry, catalog name-tree metadata bounds, and named color-space soft-mask image preview metadata without Python/models/external PDF tools. The accepted 2026-06-02 18:43Z current-base batch maps runtime conversion boundary planning, platform/embedded AcroForm action review, CID encoding widths, inline ImageMask preview rows, associated related-file metadata, outline name-tree Limits, annotation reply threads, xref-stream owner boundaries, DSS multi-signature matching, duplicate object-stream zero-width guards, and table grid-border assignment review without Python/models/external PDF tools.",
        "inventory": {
            "repositoryFiles": 78,
            "targetedLaneRelevantPaths": 78,
            "pythonFiles": 47,
            "markerPythonModules": 39,
            "pythonTestFiles": 0,
            "workflowFiles": 3,
            "ciBenchmarkWorkflows": 1,
            "benchmarkRunnerScripts": 1,
            "benchmarkRunnerMainLoopBehaviors": 5,
            "mappedBenchmarkRunnerMainLoopBehaviors": 5,
            "benchmarkRunnerReportAggregations": 1,
            "mappedBenchmarkRunnerReportAggregations": 1,
            "benchmarkScoringModules": 2,
            "benchmarkScoringFunctions": 6,
            "mappedBenchmarkScoringFunctions": 6,
            "scoreVerifierScripts": 1,
            "mappedMarkerBenchmarkUpstreamCiEvidenceCurrentBaseBehaviors": 1,
            "cleanerModules": 7,
            "cleanerFunctions": 17,
            "mappedCleanerFunctions": 17,
            "markdownPostprocessorFunctions": 8,
            "mappedMarkdownPostprocessorFunctions": 8,
            "layoutOrderingModules": 1,
            "layoutAnnotationModules": 1,
            "layoutAnnotationFunctions": 3,
            "mappedLayoutAnnotationFunctions": 3,
            "pdfUtilityModules": 1,
            "pdfUtilityFunctions": 3,
            "mappedPdfUtilityFunctions": 3,
            "pdfTextExtractionModules": 1,
            "pdfTextExtractionFunctions": 4,
            "mappedPdfTextExtractionFunctions": 4,
            "pdfTextStreamFilterBehaviors": 1,
            "mappedPdfTextStreamFilterBehaviors": 3,
            "pdfTextAdvancePositioningBehaviors": 1,
            "mappedPdfTextAdvancePositioningBehaviors": 1,
            "pdfTextStateSpacingAdvanceBehaviors": 1,
            "mappedPdfTextStateSpacingAdvanceBehaviors": 1,
            "pdfTextGraphicsStateScopeBehaviors": 1,
            "mappedPdfTextGraphicsStateScopeBehaviors": 1,
            "pdfTextTJPositioningBehaviors": 1,
            "mappedPdfTextTJPositioningBehaviors": 1,
            "pdfTextMatrixHorizontalScaleBehaviors": 1,
            "mappedPdfTextMatrixHorizontalScaleBehaviors": 1,
            "mappedLayoutOrderingFunctions": 3,
            "ocrHeuristicModules": 2,
            "ocrHeuristicFunctions": 5,
            "mappedOcrHeuristicFunctions": 5,
            "ocrLanguageModules": 2,
            "ocrLanguageFunctions": 3,
            "mappedOcrLanguageFunctions": 3,
            "ocrDetectionModules": 1,
            "ocrDetectionFunctions": 2,
            "mappedOcrDetectionFunctions": 2,
            "ocrRecognitionModules": 1,
            "ocrRecognitionFunctions": 6,
            "mappedOcrRecognitionFunctions": 3,
            "suryaLanguageCodeEntries": 94,
            "suryaTokenizerLanguageMapEntries": 94,
            "tesseractLanguageCodeEntries": 93,
            "imageInsertionArtifacts": 4,
            "imageInsertionFunctions": 5,
            "mappedImageInsertionFunctions": 5,
            "pdfImageRenderingModules": 1,
            "pdfImageRenderingFunctions": 2,
            "mappedPdfImageRenderingFunctions": 2,
            "bboxGeometryModules": 1,
            "bboxGeometryFunctions": 8,
            "mappedBboxGeometryFunctions": 8,
            "bboxElementBehaviors": 8,
            "mappedBboxElementBehaviors": 8,
            "equationArtifacts": 2,
            "equationFunctions": 7,
            "mappedEquationFunctions": 7,
            "mappedEquationInferenceBatchBehaviors": 2,
            "tableFormattingModules": 1,
            "tableFormattingFunctions": 3,
            "mappedTableFormattingFunctions": 2,
            "lockedTabledPdfSourceArchives": 1,
            "tabledPdfDetectionFunctions": 3,
            "mappedTabledPdfDetectionFunctions": 2,
            "tabledPdfRecognitionFunctions": 2,
            "mappedTabledPdfRecognitionFunctions": 2,
            "tabledPdfAssignmentFunctions": 8,
            "mappedTabledPdfAssignmentFunctions": 8,
            "tabledPdfHeuristicFunctions": 3,
            "mappedTabledPdfHeuristicFunctions": 3,
            "tabledPdfMarkdownFormattingFunctions": 6,
            "mappedTabledPdfMarkdownFormattingFunctions": 6,
            "tableUtilityModules": 1,
            "tableUtilityFunctions": 3,
            "mappedTableUtilityFunctions": 3,
            "outputModules": 1,
            "outputFunctions": 4,
            "mappedOutputFunctions": 4,
            "markerAppModules": 1,
            "markerAppImageEmbeddingFunctions": 2,
            "mappedMarkerAppImageEmbeddingFunctions": 2,
            "markerAppPdfPreviewFunctions": 3,
            "mappedMarkerAppPdfPreviewFunctions": 3,
            "markerServerModules": 1,
            "markerServerEndpointBehaviors": 5,
            "mappedMarkerServerEndpointBehaviors": 5,
            "batchConversionScripts": 1,
            "mappedBatchConversionBehaviors": 5,
            "singleDocumentConversionScripts": 1,
            "mappedSingleDocumentConversionBehaviors": 6,
            "chunkConversionScripts": 2,
            "mappedChunkConversionBehaviors": 4,
            "settingsModules": 1,
            "settingsFieldsAndComputedProperties": 50,
            "mappedSettingsBehaviors": 8,
            "modelLoaderModules": 1,
            "modelLoaderFunctions": 7,
            "mappedModelLoaderFunctions": 7,
            "cudaUtilityModules": 1,
            "cudaUtilityFunctions": 1,
            "mappedCudaUtilityFunctions": 1,
            "readmeBenchmarkDocuments": 6,
            "committedMarkerMarkdownExamples": 4,
            "committedNougatMarkdownExamples": 4,
            "ciBenchmarkArchiveZipFiles": 1,
            "ciBenchmarkArchivePdfFiles": 2,
            "ciBenchmarkArchiveReferenceFiles": 2,
            "mappedExternalBenchmarkPairs": 2,
            "mappedScoreVerifierFunctions": 2,
            "convertPipelineModules": 1,
            "convertPipelineFinalizationFunctions": 1,
            "mappedConvertPipelineFinalizationFunctions": 1,
            "convertPipelineEarlyBoundaryBehaviors": 4,
            "mappedConvertPipelineEarlyBoundaryBehaviors": 4,
            "convertPipelineSuppliedDocumentBehaviors": 2,
            "mappedConvertPipelineSuppliedDocumentBehaviors": 5,
            "mappedSuppliedDocumentBenchmarkExcerpts": 4,
            "mappedSwitchTransformerTocTableSuppliedFixtures": 1,
            "mappedSwitchTransformerTable1SuppliedFixtures": 1,
            "mappedUnicodeMarkdownTableWidthBehaviors": 1,
            "mappedForcedOcrTableRoutingFixtures": 1,
            "mappedSuppliedEquationDocumentFixtures": 1,
            "schemaPageHelperFunctions": 6,
            "mappedSchemaPageHelperFunctions": 6,
            "schemaBlockStructureFunctions": 3,
            "mappedSchemaBlockStructureFunctions": 3,
            "schemaBlockFilterFunctions": 2,
            "mappedSchemaBlockFilterFunctions": 2,
            "schemaBlockEmptyLineCompactionBehaviors": 2,
            "mappedSchemaBlockEmptyLineCompactionBehaviors": 2,
            "debugModules": 2,
            "debugFunctions": 7,
            "mappedDebugDataFunctions": 3,
            "debugRenderFunctions": 3,
            "mappedDebugRenderFunctions": 1,
            "runtimeUtilityModules": 2,
            "runtimeUtilityFunctions": 2,
            "mappedRuntimeUtilityFunctions": 2,
            "markerAppImportEnvironmentVariables": 3,
            "pdfTextLiteralEscapeBehaviors": 1,
            "mappedPdfTextLiteralEscapeBehaviors": 1,
            "pdfTextIndirectFilterDecodeParmsBehaviors": 1,
            "mappedPdfTextIndirectFilterDecodeParmsBehaviors": 1,
            "pdfTextASCII85StreamFilterBehaviors": 1,
            "mappedPdfTextASCII85StreamFilterBehaviors": 1,
            "pdfTextRunLengthStreamFilterBehaviors": 1,
            "mappedPdfTextRunLengthStreamFilterBehaviors": 1,
            "pdfTextFlatePredictorBehaviors": 1,
            "mappedPdfTextFlatePredictorBehaviors": 1,
            "pdfNameEscapeBehaviors": 1,
            "mappedPdfNameEscapeBehaviors": 1,
            "pdfToUnicodeBfRangeArrayBehaviors": 1,
            "mappedPdfToUnicodeBfRangeArrayBehaviors": 1,
            "pdfToUnicodeUseCMapBehaviors": 1,
            "mappedPdfToUnicodeUseCMapBehaviors": 1,
            "pdfToUnicodeCodespaceFallbackBehaviors": 1,
            "mappedPdfToUnicodeCodespaceFallbackBehaviors": 1,
            "pdfTextLiteralUtf16BomBehaviors": 1,
            "mappedPdfTextLiteralUtf16BomBehaviors": 1,
            "pdfSimpleFontEncodingDifferencesBehaviors": 1,
            "mappedPdfSimpleFontEncodingDifferencesBehaviors": 1,
            "pdfSimpleFontWinAnsiEncodingBehaviors": 1,
            "mappedPdfSimpleFontWinAnsiEncodingBehaviors": 1,
            "pdfPageContentStreamBehaviors": 1,
            "mappedPdfPageContentStreamBehaviors": 2,
            "documentStructureBoundaryBehaviors": 1,
            "mappedDocumentStructureBoundaryBehaviors": 1,
            "pdfIdentityCMapFontEncodingBehaviors": 1,
            "mappedPdfIdentityCMapFontEncodingBehaviors": 1,
            "pdfFormXObjectInvocationBehaviors": 1,
            "mappedPdfFormXObjectInvocationBehaviors": 1,
            "pdfNestedFormXObjectResourceBoundaryBehaviors": 1,
            "mappedPdfNestedFormXObjectResourceBoundaryBehaviors": 1,
            "pdfPageResourceInheritanceBehaviors": 1,
            "mappedPdfPageResourceInheritanceBehaviors": 1,
            "pdfObjectStreamXrefParserBehaviors": 1,
            "mappedPdfObjectStreamXrefParserBehaviors": 1,
            "pdfToUnicodeSourceWidthFallbackBehaviors": 1,
            "mappedPdfToUnicodeSourceWidthFallbackBehaviors": 1,
            "pdfMalformedToUnicodeCMapFilterFallbackBehaviors": 1,
            "mappedPdfMalformedToUnicodeCMapFilterFallbackBehaviors": 1,
            "pdfImageXObjectBoundaryBehaviors": 1,
            "mappedPdfImageXObjectBoundaryBehaviors": 1,
            "pdfImageXObjectPlacementBoundaryCurrentBaseBehaviors": 2,
            "mappedPdfImageXObjectPlacementBoundaryCurrentBaseBehaviors": 2,
            "pdfTextDictionaryCoreBehaviors": 1,
            "mappedPdfTextDictionaryCoreBehaviors": 1,
            "pdfTextDictionarySortBoundaryCurrentBaseBehaviors": 1,
            "mappedPdfTextDictionarySortBoundaryCurrentBaseBehaviors": 1,
            "pdfTextDictionaryBlankPageBoundaryCurrentBaseBehaviors": 1,
            "mappedPdfTextDictionaryBlankPageBoundaryCurrentBaseBehaviors": 1,
            "pdfTextLZWStreamFilterBehaviors": 1,
            "mappedPdfTextLZWStreamFilterBehaviors": 1,
            "pdfOutlineInfoMetadataBehaviors": 1,
            "mappedPdfOutlineInfoMetadataBehaviors": 1,
            "pdfNamedDestinationOutlineBehaviors": 1,
            "mappedPdfNamedDestinationOutlineBehaviors": 1,
            "pdfLinkAnnotationUriBehaviors": 1,
            "mappedPdfLinkAnnotationUriBehaviors": 1,
            "pdfTextDCTDecodeFilterBehaviors": 1,
            "mappedPdfTextDCTDecodeFilterBehaviors": 1,
            "pdfTextCCITTFaxStreamFilterBoundaryBehaviors": 1,
            "mappedPdfTextCCITTFaxStreamFilterBoundaryBehaviors": 1,
            "pdfPageLabelsNumberTreeBehaviors": 1,
            "mappedPdfPageLabelsNumberTreeBehaviors": 1,
            "pdfXmpMetadataExtractionBehaviors": 1,
            "mappedPdfXmpMetadataExtractionBehaviors": 1,
            "pdfCidFontWidthMetricBehaviors": 1,
            "mappedPdfCidFontWidthMetricBehaviors": 1,
            "pdfTrueTypeEmbeddedGlyphOrderWidthBehaviors": 2,
            "mappedPdfTrueTypeEmbeddedGlyphOrderWidthBehaviors": 2,
            "pdfCidFontCidSetGlyphWidthBehaviors": 1,
            "mappedPdfCidFontCidSetGlyphWidthBehaviors": 1,
            "pdfInlineImagePayloadBoundaryBehaviors": 1,
            "mappedPdfInlineImagePayloadBoundaryBehaviors": 1,
            "pdfTextLZWDecodeParmsBoundaryBehaviors": 1,
            "mappedPdfTextLZWDecodeParmsBoundaryBehaviors": 1,
            "pdfDctDecodeCmykAdobeTransformBehaviors": 1,
            "mappedPdfDctDecodeCmykAdobeTransformBehaviors": 1,
            "pdfAcroFormFieldFlagsDefaultAppearanceBehaviors": 1,
            "mappedPdfAcroFormFieldFlagsDefaultAppearanceBehaviors": 1,
            "pdfTextFilterChainDecodeParmsArrayBehaviors": 1,
            "mappedPdfTextFilterChainDecodeParmsArrayBehaviors": 1,
            "pdfAOutputIntentMetadataBehaviors": 1,
            "mappedPdfAOutputIntentMetadataBehaviors": 1,
            "pdfRemoteGoToOutlineActionBehaviors": 1,
            "mappedPdfRemoteGoToOutlineActionBehaviors": 1,
            "pdfTextJPXJBIG2ImageFilterBoundaryBehaviors": 1,
            "mappedPdfTextJPXJBIG2ImageFilterBoundaryBehaviors": 1,
            "pdfHybridXrefPrevOverrideBehaviors": 1,
            "mappedPdfHybridXrefPrevOverrideBehaviors": 1,
            "pdfXrefFreeGenerationSuppressionBehaviors": 1,
            "mappedPdfXrefFreeGenerationSuppressionBehaviors": 1,
            "pdfEncryptedDocumentPreflightBehaviors": 1,
            "mappedPdfEncryptedDocumentPreflightBehaviors": 1,
            "pdfEncryptedPermissionCryptFilterPreflightCurrentBaseBehaviors": 1,
            "mappedPdfEncryptedPermissionCryptFilterPreflightCurrentBaseBehaviors": 1,
            "pdfQpdfEncryptedPermissionFixtureCurrentBaseBehaviors": 1,
            "mappedPdfQpdfEncryptedPermissionFixtureCurrentBaseBehaviors": 1,
            "pdfAcroFormSignatureDocMdpPermissionBehaviors": 1,
            "mappedPdfAcroFormSignatureDocMdpPermissionBehaviors": 1,
            "pdfCidFontVerticalMetricBehaviors": 1,
            "mappedPdfCidFontVerticalMetricBehaviors": 1,
            "pdfPageRotationUserUnitBoxGeometryBehaviors": 1,
            "mappedPdfPageRotationUserUnitBoxGeometryBehaviors": 1,
            "pdfStructTreeMcidReadingOrderBehaviors": 1,
            "mappedPdfStructTreeMcidReadingOrderBehaviors": 1,
            "pdfTaggedTableSectionOrderingCurrentBaseBehaviors": 1,
            "mappedPdfTaggedTableSectionOrderingCurrentBaseBehaviors": 1,
            "pdfTrailerIdFingerprintBehaviors": 1,
            "mappedPdfTrailerIdFingerprintBehaviors": 1,
            "pdfCatalogLanguageViewerPreferenceBehaviors": 1,
            "mappedPdfCatalogLanguageViewerPreferenceBehaviors": 1,
            "pdfCatalogOpenActionReviewBehaviors": 1,
            "mappedPdfCatalogOpenActionReviewBehaviors": 1,
            "pdfTextLengthBoundedStreamFilterBehaviors": 1,
            "mappedPdfTextLengthBoundedStreamFilterBehaviors": 1,
            "pdfDestinationViewMetadataBehaviors": 1,
            "mappedPdfDestinationViewMetadataBehaviors": 1,
            "pdfXfaPacketReviewBehaviors": 1,
            "mappedPdfXfaPacketReviewBehaviors": 1,
            "pdfEmbeddedFileNameTreeBehaviors": 1,
            "mappedPdfEmbeddedFileNameTreeBehaviors": 1,
            "pdfObjectStreamFreeEntryReuseGuardBehaviors": 1,
            "mappedPdfObjectStreamFreeEntryReuseGuardBehaviors": 1,
            "pdfAnnotationBorderPopupReviewBehaviors": 1,
            "mappedPdfAnnotationBorderPopupReviewBehaviors": 1,
            "pdfFontDescriptorFlagsStyledSpanBehaviors": 1,
            "mappedPdfFontDescriptorFlagsStyledSpanBehaviors": 1,
            "pdfPageTransitionActionMetadataBehaviors": 1,
            "mappedPdfPageTransitionActionMetadataBehaviors": 1,
            "pdfAcroFormCalculationActionBehaviors": 1,
            "mappedPdfAcroFormCalculationActionBehaviors": 1,
            "pdfPortfolioPieceInfoBehaviors": 1,
            "mappedPdfPortfolioPieceInfoBehaviors": 1,
            "pdfAcroFormCurrentValueStateBehaviors": 1,
            "mappedPdfAcroFormCurrentValueStateBehaviors": 1,
            "pdfToUnicodeCMapCommentBehaviors": 1,
            "mappedPdfToUnicodeCMapCommentBehaviors": 1,
            "pdfIndirectNameTreeDestinationBehaviors": 1,
            "mappedPdfIndirectNameTreeDestinationBehaviors": 1,
            "pdfAnnotationGeometryReviewBehaviors": 1,
            "mappedPdfAnnotationGeometryReviewBehaviors": 1,
            "nativeCoverage": [
                "PDF JavaScript action-chain /Next traversal follows review-only action graphs through URI, Launch, and JavaScript actions, blocks cycles and excessive depth, and records chain indexes without executing scripts.",
                "PDF startxref chain precedence follows the latest xref table/stream chain, /Prev, and hybrid /XRefStm before object-stream rebuild fallback so stale appended xrefs cannot override current page objects.",
                "PDF TJ array parsing treats % comments as lexical whitespace so hidden comment text, numeric adjustments, and delimiters cannot leak into WordPress paragraphs or close the text array early.",
                "PDF Sound and Movie annotations are extracted as review-only media dictionaries, including movie file/aspect/activation and sound sample/compression fields, without executing media, JavaScript, or leaking payload/appearance text.",
                "MarkerAppPreview preserves inherited indirect page boxes, ignores invalid page-local rotations that are not multiples of 90, and keeps /UserUnit page-local/defaulted when planning preview dimensions without pypdfium execution.",
                "PDF stream filters resolve indirect numeric DecodeParms values for Predictor, Columns, Colors, BitsPerComponent, and EarlyChange before Flate/LZW predictor reconstruction, with invalid indirect EarlyChange values failing closed."
            ],
            "0": "marker PDF UTF-16 XDP XFA stream packet metadata is represented by PdfAcroFormExtractor, including BOM-aware XML decoding, xdp:xdp packet-name extraction, field/data node review metadata, and dynamic XFA data exclusion.",
            "1": "marker PDF stale stream /Length recovery is represented by PdfTextExtractor, including direct/indirect length validation, bounded endstream fallback, CMap stream payload recovery, and unsupported-filter fail-closed behavior.",
            "2": "marker PDF latest trailer /Root catalog generation recovery is represented by PdfTextExtractor, including latest startxref trailer/xref-stream root selection, /Prev fallback, hybrid /XRefStm fallback, and stale catalog exclusion.",
            "3": "marker PDF StandardEncoding, MacRomanEncoding, and implicit SymbolEncoding behavior is represented by PdfTextExtractor, including BaseEncoding, Differences merging, and Symbol BaseFont fallback.",
            "4": "marker PDF PDFDocEncoding trailer /Info metadata decoding is represented by PdfMetadataExtractor, including bullet, ligature, smart quote, minus, per-mille, Lslash/lslash, Euro, and Latin-1 byte mapping with UTF-16 BOM handling preserved.",
            "pdfPageTreeCycleResourceGuardBehaviors": 1,
            "mappedPdfPageTreeCycleResourceGuardBehaviors": 1,
            "pdfStreamFilterIndirectNameArrayBehaviors": 1,
            "mappedPdfStreamFilterIndirectNameArrayBehaviors": 1,
            "pdfXmpInfoEncodingFallbackBehaviors": 1,
            "mappedPdfXmpInfoEncodingFallbackBehaviors": 1,
            "pdfRotatedMarkupQuadPointBehaviors": 1,
            "mappedPdfRotatedMarkupQuadPointBehaviors": 1,
            "pdfCidFontDescriptorDefaultWidthBehaviors": 1,
            "mappedPdfCidFontDescriptorDefaultWidthBehaviors": 1,
            "pdfAcroFormXfaSignatureWidgetStateBehaviors": 1,
            "mappedPdfAcroFormXfaSignatureWidgetStateBehaviors": 1,
            "pdfSecurityPreflightDecisionBehaviors": 1,
            "mappedPdfSecurityPreflightDecisionBehaviors": 1,
            "pdfFilespecPieceInfoPrivateStreamBehaviors": 1,
            "mappedPdfFilespecPieceInfoPrivateStreamBehaviors": 1,
            "pdfEmbeddedFileCollectionAssociatedChecksumBehaviors": 1,
            "mappedPdfEmbeddedFileCollectionAssociatedChecksumBehaviors": 1,
            "pdfCidFontDecimalWidthBehaviors": 1,
            "mappedPdfCidFontDecimalWidthBehaviors": 1,
            "pdfSoftMaskDecodeOpacityBehaviors": 1,
            "mappedPdfSoftMaskDecodeOpacityBehaviors": 1,
            "pdfPageLabelTransitionViewerPreferenceBehaviors": 1,
            "mappedPdfPageLabelTransitionViewerPreferenceBehaviors": 1,
            "pdfEncryptedMetadataPriorityBehaviors": 1,
            "mappedPdfEncryptedMetadataPriorityBehaviors": 1,
            "pdfRichMediaAnnotationActionBoundaryBehaviors": 1,
            "mappedPdfRichMediaAnnotationActionBoundaryBehaviors": 1,
            "pdfLinearizedIncrementalHintObjectBoundaryBehaviors": 1,
            "mappedPdfLinearizedIncrementalHintObjectBoundaryBehaviors": 1,
            "pdfMediaAnnotationPopupBoundaryBehaviors": 1,
            "mappedPdfMediaAnnotationPopupBoundaryBehaviors": 1,
            "pdfCMapSourceGlyphAdvanceBoundaryBehaviors": 1,
            "mappedPdfCMapSourceGlyphAdvanceBoundaryBehaviors": 1,
            "pdfXrefStreamInvalidExplicitOffsetBoundaryBehaviors": 1,
            "mappedPdfXrefStreamInvalidExplicitOffsetBoundaryBehaviors": 1,
            "pdfAssociatedFilePieceInfoChecksumBehaviors": 1,
            "mappedPdfAssociatedFilePieceInfoChecksumBehaviors": 1,
            "pdfPageGraphicsStateCmTextTransformBehaviors": 1,
            "mappedPdfPageGraphicsStateCmTextTransformBehaviors": 1,
            "markerAppPreviewCropBoundaryBehaviors": 1,
            "mappedMarkerAppPreviewCropBoundaryBehaviors": 1,
            "pdfPredefinedVerticalCMapWidthBehaviors": 1,
            "mappedPdfPredefinedVerticalCMapWidthBehaviors": 1,
            "pdfStructTreeThreadPrecedenceBehaviors": 1,
            "mappedPdfStructTreeThreadPrecedenceBehaviors": 1,
            "pdfIndirectType0EncodingNameBehaviors": 1,
            "mappedPdfIndirectType0EncodingNameBehaviors": 1,
            "pdfXrefStreamObjectStreamOmittedIndexRepairBehaviors": 1,
            "mappedPdfXrefStreamObjectStreamOmittedIndexRepairBehaviors": 1,
            "pdfTokenAwareDirectStreamOwnerBehaviors": 1,
            "mappedPdfTokenAwareDirectStreamOwnerBehaviors": 1,
            "pdfMarkupUserUnitQuadPointBehaviors": 1,
            "mappedPdfMarkupUserUnitQuadPointBehaviors": 1,
            "pdfAcroFormAppearanceValueActionBehaviors": 1,
            "mappedPdfAcroFormAppearanceValueActionBehaviors": 1,
            "mappedCatalogDestinationMetadataBehaviors": 1,
            "nativeScenarios": {
                "pdfAnnotationPopupAppearanceActionReview": "Generic page annotations expose nested popup review rows, selected /AP /N state, /Dest, /A, /AA, and chained /Next action metadata without executing PDF actions or leaking popup/action/stale appearance text into WordPress content.",
                "pdfAcroFormNestedNextActionReview": "AcroForm field/widget /A and /AA /Next chains are walked through single dictionaries and arrays with bounded cycle-safe review metadata; URI, Launch, Hide, JavaScript, Named, and GoTo actions stay non-executing while field /V remains the import value.",
                "pdfXrefUnselectedObjectStreamBoundary": "When selected xref entries exist, /ObjStm members are expanded only for selected type-2 rows so stale unselected compressed page-tree members cannot overwrite the current trailer-selected direct page tree before WordPress text extraction.",
                "pdfRichMediaScreenRenditionCurrentAction": "Screen Rendition actions expose OP 4 play-or-resume, current-associated stop/pause/resume scope when /R is omitted, and review-only /JS preview/hash metadata while media, appearance, and script payloads remain non-executing and hidden from visible text.",
                "pdfAnnotationStandardActionReview": "Standard annotation Named, ImportData, SubmitForm, Hide, and ResetForm actions are exposed as non-executing review metadata while selected appearance text remains visible and popup/action target strings stay out of WordPress paragraphs.",
                "pdfOutlineActionNavigationReview": "Outline item /A action dictionaries and chained /Next followups surface as non-executing navigation review metadata while local targets inherit page labels, page transitions, and page action context without leaking URI or JavaScript operands into visible WordPress paragraphs."
            },
            "pdfAnnotationPopupAppearanceActionReviewBehaviors": 1,
            "mappedPdfAnnotationPopupAppearanceActionReviewBehaviors": 1,
            "pdfAcroFormNestedNextActionReviewBehaviors": 1,
            "mappedPdfAcroFormNestedNextActionReviewBehaviors": 1,
            "pdfXrefUnselectedObjectStreamBoundaryBehaviors": 1,
            "mappedPdfXrefUnselectedObjectStreamBoundaryBehaviors": 1,
            "pdfRichMediaScreenRenditionCurrentActionBehaviors": 1,
            "mappedPdfRichMediaScreenRenditionCurrentActionBehaviors": 1,
            "pdfAnnotationStandardActionReviewBehaviors": 1,
            "mappedPdfAnnotationStandardActionReviewBehaviors": 1,
            "pdfOutlineActionNavigationReviewBehaviors": 1,
            "mappedPdfOutlineActionNavigationReviewBehaviors": 1,
            "pdfWidgetAnnotationLinkBehaviors": 1,
            "mappedPdfWidgetAnnotationLinkBehaviors": 1,
            "pdfImageSoftMaskFilterBoundaryBehaviors": 1,
            "mappedPdfImageSoftMaskFilterBoundaryBehaviors": 1,
            "pdfXrefStreamType2ObjectStreamBaseBoundaryBehaviors": 1,
            "mappedPdfXrefStreamType2ObjectStreamBaseBoundaryBehaviors": 1,
            "pdfParserStreamDictionaryEscapeBoundaryBehaviors": 1,
            "mappedPdfParserStreamDictionaryEscapeBoundaryBehaviors": 1,
            "pdfFontToUnicodeWidthResourceBoundaryBehaviors": 1,
            "mappedPdfFontToUnicodeWidthResourceBoundaryBehaviors": 1,
            "pdfType0IndirectCidFontWidthBehaviors": 1,
            "mappedPdfType0IndirectCidFontWidthBehaviors": 1,
            "pdfObjectStreamFilterOwnerBoundaryBehaviors": 1,
            "mappedPdfObjectStreamFilterOwnerBoundaryBehaviors": 1,
            "mappedPdfAcroFormWidgetAppearanceCalcOrderReviewBehaviors": 1,
            "mappedPdfFontEncodingDifferencesCMapWidthReviewBehaviors": 1,
            "mappedPdfFontWidthCMapFallbackFlagsReviewBehaviors": 1,
            "mappedPdfNavigationStructElemPageLabelReviewBehaviors": 1,
            "mappedPdfMetadataStructureOutputIntentFilespecReviewBehaviors": 1,
            "mappedPdfOutlineNamedDestinationActionThreadReviewBehaviors": 1,
            "mappedPdfParserInlineStreamLengthFilterRepairBehaviors": 1,
            "mappedPdfParserXrefStreamFilterLengthOwnerReviewBehaviors": 1,
            "mappedPdfXrefLinearizedObjectStreamHintRepairBehaviors": 1,
            "mappedPdfXrefObjectStreamFreeEntryPrevReviewBehaviors": 1,
            "mappedPdfAcroFormSignatureXfaWidgetActionReviewBehaviors": 1,
            "mappedTableOcrRowspanColspanContinuationReviewBehaviors": 1,
            "mappedPdfPageStructParentsResourcesTransitionLabelReviewBehaviors": 1,
            "mappedPdfSecurityByteRangeDssDocMdpReviewBehaviors": 1,
            "markerBenchmarkRuntimeApiCurrentBaseBehaviors": 1,
            "mappedMarkerBenchmarkRuntimeApiCurrentBaseBehaviors": 1,
            "pdfOutlineDestinationActionContextCurrentBaseBehaviors": 1,
            "mappedPdfOutlineDestinationActionContextCurrentBaseBehaviors": 1,
            "pdfXrefCurrentBaseRepairBoundaryBehaviors": 1,
            "mappedPdfXrefCurrentBaseRepairBoundaryBehaviors": 1,
            "pdfSecurityAcroFormPermissionActionCurrentBaseBehaviors": 1,
            "mappedPdfSecurityAcroFormPermissionActionCurrentBaseBehaviors": 1,
            "pdfAcroFormXfaWidgetCurrentBaseBehaviors": 1,
            "mappedPdfAcroFormXfaWidgetCurrentBaseBehaviors": 1,
            "tableOcrPolygonGeometryCurrentBaseBehaviors": 1,
            "mappedTableOcrPolygonGeometryCurrentBaseBehaviors": 1,
            "pdfMetadataCatalogNameTreeCurrentBaseBehaviors": 1,
            "mappedPdfMetadataCatalogNameTreeCurrentBaseBehaviors": 1,
            "pdfImageNamedColorSpaceSmaskCurrentBaseBehaviors": 1,
            "mappedPdfImageNamedColorSpaceSmaskCurrentBaseBehaviors": 1,
            "markerRuntimeConversionBoundaryCurrentBaseBehaviors": 1,
            "mappedMarkerRuntimeConversionBoundaryCurrentBaseBehaviors": 1,
            "markerRuntimeSinglePreflightBoundaryCurrentBaseBehaviors": 1,
            "mappedMarkerRuntimeSinglePreflightBoundaryCurrentBaseBehaviors": 1,
            "pdfAcroFormActionBoundaryCurrentBaseBehaviors": 1,
            "mappedPdfAcroFormActionBoundaryCurrentBaseBehaviors": 1,
            "pdfFontCidEncodingWidthCurrentBaseBehaviors": 1,
            "mappedPdfFontCidEncodingWidthCurrentBaseBehaviors": 1,
            "pdfImageInlineMaskPreviewCurrentBaseBehaviors": 1,
            "mappedPdfImageInlineMaskPreviewCurrentBaseBehaviors": 1,
            "pdfMetadataAssociatedRelatedFilesCurrentBaseBehaviors": 1,
            "mappedPdfMetadataAssociatedRelatedFilesCurrentBaseBehaviors": 1,
            "pdfOutlineNameTreeLimitsCurrentBaseBehaviors": 1,
            "mappedPdfOutlineNameTreeLimitsCurrentBaseBehaviors": 1,
            "pdfPageAnnotationThreadCurrentBaseBehaviors": 1,
            "mappedPdfPageAnnotationThreadCurrentBaseBehaviors": 1,
            "pdfParserXrefStreamBoundaryCurrentBaseBehaviors": 1,
            "mappedPdfParserXrefStreamBoundaryCurrentBaseBehaviors": 1,
            "pdfSecurityDssSignatureCurrentBaseBehaviors": 1,
            "mappedPdfSecurityDssSignatureCurrentBaseBehaviors": 1,
            "pdfXrefObjectStreamDuplicateZeroWidthCurrentBaseBehaviors": 1,
            "mappedPdfXrefObjectStreamDuplicateZeroWidthCurrentBaseBehaviors": 1,
            "tableGridBorderAssignedReviewCurrentBaseBehaviors": 1,
            "mappedTableGridBorderAssignedReviewCurrentBaseBehaviors": 1,
            "pdfMetadataTrailerIdLangViewerPreferenceCurrentBaseBehaviors": 1,
            "mappedPdfMetadataTrailerIdLangViewerPreferenceCurrentBaseBehaviors": 1,
            "pdfXrefLinearizedPrevHintStartxrefCurrentBaseBehaviors": 1,
            "mappedPdfXrefLinearizedPrevHintStartxrefCurrentBaseBehaviors": 1,
            "pdfRichMediaScreenSelectorRenditionCurrentBaseBehaviors": 1,
            "mappedPdfRichMediaScreenSelectorRenditionCurrentBaseBehaviors": 1,
            "pdfAnnotationActionReferenceCoverageCurrentBaseBehaviors": 2,
            "mappedPdfAnnotationActionReferenceCoverageCurrentBaseBehaviors": 2,
            "suppliedImageOnlyOcrHandoffCurrentBaseBehaviors": 1,
            "mappedSuppliedImageOnlyOcrHandoffCurrentBaseBehaviors": 1,
            "pdfImageDeviceNSeparationSmaskDecodeCurrentBaseBehaviors": 1,
            "mappedPdfImageDeviceNSeparationSmaskDecodeCurrentBaseBehaviors": 1,
            "pdfMetadataPortfolioAssociatedPieceInfoChecksumCurrentBaseBehaviors": 1,
            "mappedPdfMetadataPortfolioAssociatedPieceInfoChecksumCurrentBaseBehaviors": 1,
            "tableRotatedHeaderAccessibilityGridCurrentBaseBehaviors": 1,
            "mappedTableRotatedHeaderAccessibilityGridCurrentBaseBehaviors": 1,
            "pdfFontType3ColorGlyphResourceWidthCurrentBaseBehaviors": 1,
            "mappedPdfFontType3ColorGlyphResourceWidthCurrentBaseBehaviors": 1,
            "pdfFontType3CharProcsGenerationBoundaryCurrentBaseBehaviors": 1,
            "mappedPdfFontType3CharProcsGenerationBoundaryCurrentBaseBehaviors": 1,
            "layoutPageHeaderFooterRotatedColumnsCurrentBaseBehaviors": 1,
            "mappedLayoutPageHeaderFooterRotatedColumnsCurrentBaseBehaviors": 1,
            "pdfParserObjectStreamGenerationOffsetOwnerCurrentBaseBehaviors": 1,
            "mappedPdfParserObjectStreamGenerationOffsetOwnerCurrentBaseBehaviors": 1,
            "pdfMetadataXmpLangMarkInfoCatalogCurrentBaseBehaviors": 1,
            "mappedPdfMetadataXmpLangMarkInfoCatalogCurrentBaseBehaviors": 1,
            "pdfOutlineDirectNamedThreadActionCurrentBaseBehaviors": 1,
            "mappedPdfOutlineDirectNamedThreadActionCurrentBaseBehaviors": 1,
            "pdfXrefObjectStreamPrevGenerationRebuildCurrentBaseBehaviors": 1,
            "mappedPdfXrefObjectStreamPrevGenerationRebuildCurrentBaseBehaviors": 1,
            "pdfPageAssociatedFilesMarkedContentAltCurrentBaseBehaviors": 1,
            "mappedPdfPageAssociatedFilesMarkedContentAltCurrentBaseBehaviors": 1,
            "markerRuntimeServerUploadPaginationErrorBoundaryCurrentBaseBehaviors": 1,
            "mappedMarkerRuntimeServerUploadPaginationErrorBoundaryCurrentBaseBehaviors": 1,
            "markerRuntimePreviewArtifactBoundaryCurrentBaseBehaviors": 1,
            "mappedMarkerRuntimePreviewArtifactBoundaryCurrentBaseBehaviors": 1,
            "pdfAttachmentFileSpecAFRelationshipChecksumCurrentBaseBehaviors": 1,
            "mappedPdfAttachmentFileSpecAFRelationshipChecksumCurrentBaseBehaviors": 1,
            "pdfAttachmentPageAssociatedFilePreflightCurrentBaseBehaviors": 1,
            "mappedPdfAttachmentPageAssociatedFilePreflightCurrentBaseBehaviors": 1,
            "pdfFontSimpleType3CMapSpacingCurrentBaseBehaviors": 1,
            "mappedPdfFontSimpleType3CMapSpacingCurrentBaseBehaviors": 1,
            "tableOcrStructureAssignmentRegressionCurrentBaseBehaviors": 1,
            "mappedTableOcrStructureAssignmentRegressionCurrentBaseBehaviors": 1,
            "markerRuntimeServerBenchmarkOutputErrorRoundtripCurrentBaseBehaviors": 1,
            "mappedMarkerRuntimeServerBenchmarkOutputErrorRoundtripCurrentBaseBehaviors": 1,
            "pdfAcroFormSignatureXfaWidgetActionBoundaryCurrentBaseBehaviors": 1,
            "mappedPdfAcroFormSignatureXfaWidgetActionBoundaryCurrentBaseBehaviors": 1,
            "pdfOutlineNamedDestinationTransitionThreadSecurityCurrentBaseBehaviors": 1,
            "mappedPdfOutlineNamedDestinationTransitionThreadSecurityCurrentBaseBehaviors": 1,
            "pdfPageStructTreeMarkedContentAssociatedFilesCurrentBaseBehaviors": 1,
            "mappedPdfPageStructTreeMarkedContentAssociatedFilesCurrentBaseBehaviors": 1,
            "pdfParserSecurityXrefFilterErrorBoundaryCurrentBaseBehaviors": 1,
            "mappedPdfParserSecurityXrefFilterErrorBoundaryCurrentBaseBehaviors": 1,
            "pdfXrefHybridLinearizedObjectStreamGenerationCurrentBaseBehaviors": 1,
            "mappedPdfXrefHybridLinearizedObjectStreamGenerationCurrentBaseBehaviors": 1,
            "mappedPdfInlineImageTokenizerBoundaryCurrentBaseBehaviors": 1,
            "mappedTableGridGeometryBoundaryCurrentBaseBehaviors": 1,
            "mappedPdfTextDictionaryKeepCharsBoundaryCurrentBaseBehaviors": 1,
            "mappedPdfParserXrefStreamIndirectIndexWidthCurrentBaseBehaviors": 1,
            "mappedPdfCcittFaxFilterBoundaryCurrentBaseBehaviors": 1,
            "mappedPdfParserMalformedCMapFilterLiteralCurrentBaseBehaviors": 1,
            "mappedPdfInlineDctDecodeFilterBoundaryCurrentBaseBehaviors": 1,
            "mappedPdfXrefObjectStreamSkippedHeaderIndexCurrentBaseBehaviors": 1,
            "mappedPdfCMapDefaultWidthSourceFallbackCurrentBaseBehaviors": 1,
            "mappedPdfImageXObjectFormResourceBoundaryCurrentBaseBehaviors": 1,
            "mappedPdfOutlineStructureElementMetadataCurrentBaseBehaviors": 1,
            "mappedPdfPageResourceInheritanceCurrentBaseBehaviors": 1,
            "mappedPdfTextDictionaryLayoutOrderSuppliedRangeCurrentBaseBehaviors": 1,
            "mappedMarkerRuntimeMainPreflightBoundaryCurrentBaseBehaviors": 3,
            "mappedTableNamedBboxGeometryBoundaryCurrentBaseBehaviors": 1,
            "mappedPdfFontType3CharProcsFallbackBoundaryCurrentBaseBehaviors": 1,
            "mappedPdfLinkAnnotationPageGeometryCurrentBaseBehaviors": 1,
            "mappedPdfFontWidthAdvanceBoundaryCurrentBaseBehaviors": 3,
            "markerRuntimeMainPreflightBoundaryCurrentBaseBehaviors": 3,
            "mappedPdfPagePartialExtractionDiagnosticsCurrentBaseBehaviors": 2
        },
        "benchmarkDocuments": [
            "multicolcnn.pdf",
            "switch_trans.pdf",
            "thinkpython.pdf",
            "thinkos.pdf",
            "thinkdsp.pdf",
            "crowd.pdf"
        ],
        "ciAssertions": 3,
        "ciAssertionSource": "scripts/verify_benchmark_scores.py checks multicolcnn.pdf > 0.34 and switch_trans.pdf > 0.40 for marker output, plus average table score >= 0.7 for table reports.",
        "externalBenchmarkArchive": {
            "downloadId": "1NHrdYatR1rtqs2gPVfdvO0BAvocH8CJi",
            "filename": "benchmark_data_short.zip",
            "bytes": 6212657,
            "sha256": "c7511a4f5055e949a7a7c293be5541942433059d7841965f056d7f9b441a41ad",
            "mappedPairs": [
                {
                    "document": "multicolcnn.pdf",
                    "pdfPath": "benchmark_data/pdfs/multicolcnn.pdf",
                    "pdfBytes": 851968,
                    "pdfSha256": "2b0e8314ff2c2680dd309ce46a49d740084d66eb39549337d2daa91215c426f8",
                    "referencePath": "benchmark_data/references/multicolcnn.md",
                    "referenceBytes": 30542,
                    "referenceSha256": "3ff96757b43e82595410f0fa50643945fec7c5c51f7c3d7562edefbd555aaa96"
                },
                {
                    "document": "switch_trans.pdf",
                    "pdfPath": "benchmark_data/pdfs/switch_trans.pdf",
                    "pdfBytes": 1304157,
                    "pdfSha256": "f340f6ace31abf7d0730ef461404279f40d3c890e9cc2daeb7068b3304afdbd6",
                    "referencePath": "benchmark_data/references/switch_trans.md",
                    "referenceBytes": 82456,
                    "referenceSha256": "74f8f6cbe23873304aa182b3a46edc8df896d35542aeade95bb93254823d58e1"
                }
            ]
        },
        "committedReferenceLikeArtifacts": {
            "markerMarkdownExamples": 4,
            "nougatMarkdownExamples": 4,
            "paths": [
                "data/examples/marker/*.md",
                "data/examples/nougat/*.md"
            ]
        },
        "runnerStatus": "not-executed",
        "runnerBlocker": "The upstream CI benchmark archive was downloaded and inspected for static evidence, and native BenchmarkRunner can replay benchmarks/overall.py-style folder/reference/method/report behavior over supplied callbacks. The full upstream runner still requires Poetry plus heavy Python dependencies including torch, surya-ocr, pdftext, pypdfium2, tabled-pdf, Texify, Nougat for comparison mode, and model downloads. The defensible local denominator is the cloned source inventory plus the two hash-identified benchmark_data_short.zip PDF/reference pairs; full conversion cannot execute locally under the modest network/CPU constraint without installing and running the ML/PDF model stack.",
        "latestReducedHandoff": "markerpdf-navigation-pdftext-currentbase-20260603T0917Z",
        "mappedSemantics": 409,
        "latestRunAddendum": "2026-06-03 UTC supervisor accepted markerPDF geometry/font/type3/link handoffs on source commit 2b759f764215c5b75af63ce797f50d5fb1e71f19: TableRecognizer now normalizes named bbox fields for supplied table geometry; PdfTextExtractor now applies quote-operator word and character spacing before styled-span bbox advance and excludes exact-generation Type3 CharProc streams from stream-only fallback text; PdfLinkAnnotationExtractor now maps link rectangles through inherited CropBox, Rotate, and page-local UserUnit before supplied pdftext span promotion. Focused gate passed 15 files / 1259 assertions / 0 failures; four WordPress/example smokes emitted non-empty output; full markerPDF lane passed 308 files / 16522 assertions / 0 failures / 1016 PASS lines. No GPU/model/OCR execution was run. Behavior tests move 1012 -> 1016 pass / 0 fail; mapped semantics move 698 / 78 -> 702 / 78. Root harness not run - full markerPDF lane run covered lanes/markerpdf/tests.",
        "warning": "This is a cloned static inventory and native no-GPU PHP behavior map, not upstream model-runner parity. User direction excludes GPU/model execution, so live OCR, Surya layout/reading-order/OCR/table-cell models, Texify equation recognition, Torch/model batching, Streamlit/FastAPI model workers, page-pixel visual table recognition, and exact upstream model benchmark parity are intentionally unavailable. Current dashboard coverage maps 702 / 78 tracked upstream repository paths with 1016 PHP behavior PASS lines and 0 failures.",
        "pdfTextStreamFilterErrorBoundaryBehaviors": 1,
        "mappedPdfTextStreamFilterErrorBoundaryBehaviors": 1,
        "mappingExamples": {
            "0": "PDF stream filters fail closed when declared filters are unsupported, corrupt, unresolved indirect references, or become unknown after an earlier decoder, preventing raw filtered bytes from leaking into WordPress paragraph text.",
            "1": "PDF FontDescriptor values resolve indirect /FontName, /Flags, and /FontWeight references before styled span font names feed WordPress bold/italic Markdown cleanup.",
            "2": "PDF StructTreeRoot /RoleMap values map raw tagged-content roles to standard H1/P/Figure review roles while preserving ActualText and Alt replacements and excluding Artifact MCIDs.",
            "3": "UTF-16 XDP XFA stream packet metadata decodes BOM-prefixed /AcroForm /XFA stream XML, exposes xdp:xdp packet names and encoding review metadata, and keeps dynamic XFA data out of merged visible fields without executing XFA JavaScript.",
            "4": "PDF stream payload recovery honors declared /Length only when it lands cleanly at an endstream boundary, recovers stale or missing lengths with bounded endstream terminators, and keeps unsupported filters fail-closed before WordPress text extraction.",
            "5": "PDF incremental-update catalog recovery follows the latest startxref trailer or xref-stream /Root, falls back through /Prev and hybrid /XRefStm only when needed, and selects the current catalog generation before stale catalog scanning.",
            "6": "PDF simple-font encoding maps StandardEncoding, MacRomanEncoding via BaseEncoding, and implicit SymbolEncoding from BaseFont Symbol before WordPress paragraph rendering while preserving WinAnsi and Differences behavior.",
            "7": "PDFDocEncoding trailer /Info text strings decode bullet, ligatures, smart quotes, minus, per-mille, Lslash/lslash, Euro, and Latin-1 bytes before WordPress metadata review while preserving UTF-16 BOM handling.",
            "pdfInlineImageAbbrevDecodeParms": "inline image BI/ID/EI abbreviation and DecodeParms handling validates compressed image payload boundaries before accepting EI markers, preventing fake text bytes from leaking into WordPress paragraphs without Python/models/external PDF tools",
            "pdfIndirectDestinationViewOperands": "outline and catalog destination arrays resolve indirect view-mode and coordinate operands before WordPress navigation review metadata, preserving FitV/FitR/XYZ parameters without Python/models/external PDF tools",
            "pdfObjectStreamNestedTokenBoundary": "PDF object-stream member recovery uses token-aware direct object boundaries so fake obj/endobj/stream text inside strings, dictionaries, arrays, comments, and stream payloads cannot leak fallback text into WordPress paragraphs without Python/models/external PDF tools",
            "pdfType0EncodingCMapWidthPriority": "Type0 font /Encoding CMaps map source codes to descendant CIDs before CIDFont width and vertical displacement lookup, preserving WordPress paragraph grouping such as WideBlock and Thin Text without Python/models/external PDF tools",
            "pdfFilespecPayloadBoundary": "Filespec /EF payload streams, including streams without /Type /EmbeddedFile and dictionaries containing fake object-boundary tokens, stay review-only and are excluded from fallback WordPress paragraph extraction without Python/models/external PDF tools",
            "pdfSubsetLigatureGlyphNames": "subset simple-font Encoding Differences glyph names, including f_f_i.alt, f_i, endash, eacute, and Euro, decode to Unicode before WordPress paragraph rendering without Python/models/external PDF tools",
            "pdfAcroFormWidgetDefaultState": "AcroForm button fields compare /DV against effective widget /AS fallback state, normalizing /Off as unchecked, so default-checked checkbox and radio widgets are not falsely marked changed before WordPress review rendering",
            "pdfPageBoxIndirectOperands": "Page-box rectangle arrays with indirect numeric operands resolve before marker_app-style WordPress preview sizing, preserving inherited rotation, page-local UserUnit, and rendered image dimensions without Python/models/external PDF tools",
            "pdfFormXObjectMatrixBBox": "Invoked Form XObjects apply caller graphics-state cm plus form /Matrix to text positions and clip form-local text outside /BBox before Gutenberg paragraphs without Python/models/external PDF tools",
            "suppliedMergedTableBoundaries": "Supplied-document conversion merges adjacent table layout boxes before protecting Formula and Picture regions, preserving one table block while excluding duplicate equation and image placeholders without Python/models/external PDF tools",
            "pdfImageDecodeStencilPreview": "Base image /Decode arrays and /ImageMask stencil decode arrays are represented by PdfImageRenderer preview metadata, including component mismatch review, decoded sample values, default stencil decode [0 1], inverted mask opacity, and RGB preview intent without pypdfium/PIL execution.",
            "pdfAnnotationAppearanceResourceBoundary": "Selected annotation /AP /N appearance streams are treated as Form XObjects with scoped resources, Matrix, and BBox clipping, preserving visible current/nested appearance text while excluding out-of-bounds, stale/off, and unreferenced appearance noise.",
            "pdfAcroFormCalculationSignatureState": "Catalog /AcroForm /SigFlags and /CO, field/widget calculate actions, signed signature dictionaries, ByteRange metadata, and /Lock field-state effects are represented as review-only metadata without executing calculations, JavaScript, signature validation, or signing.",
            "pdfType0EncodingCMapBoundary": "Type0 /Encoding CMap begincodespacerange and begincidrange data are used as no-ToUnicode fallback source segmentation for descendant CIDFont /W and /DW width grouping, preserving WideBlock and Thin Text without NUL bytes or false spacing.",
            "pdfMarkInfoPageAssociatedFiles": "A PDF 2.0 page with catalog /MarkInfo plus page /AF Source and Alternative Filespec entries emits MarkInfo flags, relationship, MIME type, size, and SHA-256 review metadata while excluding embedded source/preview payload text from Gutenberg paragraphs.",
            "pdfHybridXrefFreeEntryConflict": "A PDF 1.5 hybrid xref table marks page object 4 free while /XRefStm advertises the same object as an object-stream member; native text extraction emits only the current direct page and excludes the stale object-stream page.",
            "pdfAcroFormDefaultResourceReview": "A PDF AcroForm with form-level /DR /Font resources resolves Helv and Body default-appearance fonts, preserves an unresolved widget /Missing resource, and reports execution/rendering flags as false.",
            "pdfCatalogOpenActionNextChain": "A PDF catalog OpenAction URI with /Next Launch and GoTo actions emits three review rows, marks the followups chained, dedupes repeated Next entries, and keeps all executes_on_import flags false.",
            "pdfCurrentXrefStreamEncryptedMetadataBoundary": "wordpress-pdf-current-xref-encrypted-metadata-import.php exercises latest xref-stream trailer /Encrypt with EncryptMetadata false, proving XMP dates and trailer IDs are preserved while encrypted Info/OutputIntent/text/key material stay hidden.",
            "pdfCompositeNavigationReviewMetadata": "wordpress-pdf-outline-openaction-page-label-transition.php exercises a two-page document with PageLabels, outline destinations, GoTo OpenAction, chained URI action, and page transition metadata while proving actions remain review-only.",
            "pdfSuppliedOcrPredictionTable": "Supplied OCR prediction objects with text_lines or lines are unwrapped before forced table recognition, so OCR text fills table cells and stale pdftext table lines are excluded without Python/models/external PDF tools.",
            "pdfIndexedIccJbigSoftMaskPreview": "Indexed image color spaces with ICCBased bases, JBIG2 preview-only filters, JBIG2Globals, and soft-mask Decode arrays are planned before RGB preview conversion while avoiding pypdfium/PIL/Python execution.",
            "pdfParserStreamFilterObjectBoundary": "Fallback stream decoding uses current xref-selected direct stream objects and top-level stream dictionaries, excluding stale/free filtered objects and fake nested stream tokens before WordPress paragraph rendering.",
            "pdfCatalogPieceInfoMetadataBoundary": "Catalog PieceInfo private Metadata and OutputIntents are exposed as review-only catalog.piece_info dictionaries while document XMP and PDF/A metadata roots ignore those nested private entries.",
            "pdfOutlineNameTreeTargetPresentation": "Outline destination rows resolved through name trees carry target page display duration, transition metadata, and review-only page actions while keeping URI/action targets out of visible WordPress text.",
            "pdfTextOperatorSourceSpaceWordSpacing": "PDF text-showing operators count Tw word spacing from source glyph code 0x20 when ToUnicode replacement strings contain decoded spaces or bidi isolate controls, preventing over-advance and preserving positioned word gaps before WordPress paragraph rendering.",
            "pdfSimpleFontIndirectFirstCharWidths": "Simple-font /Widths arrays are indexed from /FirstChar after resolving indirect numeric operands, preserving text advance grouping for subset /Encoding fonts before WordPress paragraph rendering.",
            "pdfHybridXrefCurrentTableGenerationPrecedence": "Current hybrid xref table rows remain authoritative over conflicting companion /XRefStm entries for the same object number, preventing stale compressed previous-generation page text from replacing live direct-generation page text.",
            "pdfSignatureReferenceTransformReview": "PDF signature /Reference dictionaries expose FieldMDP and UR3 transform metadata, field names, right categories, digest presence, and review-only execution flags while suppressing digest/signature bytes and avoiding validation, signing, or rights enforcement.",
            "pdfSecurityByteRangeDssRevisionCurrentBase": "Signed PDFs now distinguish prior-revision and current-revision ByteRange coverage and correlate DSS VRI evidence with object placement as review-only security metadata, without signature validation, revocation checks, action execution, decryption, or raw payload exposure.",
            "pdfPageStructParentUserPropertyCurrentBase": "Tagged PDF page StructParents resolve through ParentTree rows so user properties, roles, MCIDs, and titles stay attached to page review metadata without leaking review-only text into visible WordPress paragraphs.",
            "tableOcrBorderConflictGridCurrentBase": "Forced-OCR table cells crossing detector grid borders keep assigned row/column ids and spanning render-cell links so WordPress table review can inspect border conflicts without stale pdftext promotion or Python/model execution.",
            "pdfParserFilterArrayDictOwnerCurrentBase": "PDF stream filter arrays that contain dictionaries now fail closed before current-base text extraction, preventing raw or malformed stream bytes from becoming WordPress paragraph text without Python/models/external PDF tools.",
            "pdfImageDeviceNTransferJpxBoundaryCurrentBase": "DeviceN and Separation image JPX streams remain review-only while tint metadata, transfer-function soft masks, and SMaskInData precedence are preserved before WordPress image preview planning without raster execution.",
            "pdfMetadataAssociatedFileSchemaCurrentBase": "PDF Portfolio and associated-file collection schema rows now propagate field priorities, descriptions, value dictionaries, checksums, and payload omission metadata into WordPress review rows without promoting attachment payload text.",
            "pdfFontCidSetIndirectWidthBoundaryCurrentBase": "CIDFont /FontDescriptor /CIDSet streams resolve through exact-generation indirect references before default-width grouping so stale same-number CIDSet data cannot introduce false WordPress text gaps.",
            "pdfXrefStreamPrevIndexGenerationCurrentBase": "Sparse xref-stream /Index rows with /Prev generation noise now preserve valid previous type-2 object-stream rows only when the carrier storage still matches current offsets before WordPress text extraction.",
            "markerRuntimeServerConfigErrorBoundaryCurrentBase": "marker_server.py-style startup config is represented as structured host/port/API-key/upload-directory/error planning metadata, with invalid ports and upload-directory failures returned before FastAPI/Uvicorn/Python/model execution.",
            "pdfPageAnnotationParentTreeWidgetCurrentBase": "Widget annotations now inherit parent-field StructParent metadata only when their OBJR points to the current page widget, preserving tagged annotation review context without leaking action or field payload text into WordPress paragraphs.",
            "tableHeaderGridRowspanCurrentBase": "Rowspanned first-column and grouped top-row headers now export column_header_rows, ids, scopes, and data-cell header references so WordPress table imports keep accessible header relationships after Markdown formatting.",
            "pdfSecurityCertPermissionOpenActionCurrentBase": "Certified PDFs now report catalog OpenAction chains alongside DocMDP/certificate permission review metadata while certificate bytes, signature bytes, JavaScript, Launch, URI actions, validation, decryption, and enforcement remain non-executing.",
            "pdfAcroFormResourceActionFileSpecCurrentBase": "AcroForm non-JavaScript action FileSpec dictionaries now expose embedded resource review metadata, Params, hashes, and omission boundaries without submitting/importing files, executing actions, or leaking payload bytes into WordPress content.",
            "pdfParserNameEscapeArrayBoundaryCurrentBase": "Escaped PDF names inside array operands now split at true array boundaries, preventing merged name tokens from bypassing optional-content and parser safety decisions before WordPress text extraction.",
            "pdfImageIndexedColorKeyTransferCurrentBase": "Indexed ColorKey masks now apply against raw palette indexes before Decode transfer while decoded preview rows keep alpha review metadata without raster execution.",
            "pdfFontType0CMapDescriptorWidthCurrentBase": "Type0 resource CMaps now feed descendant CIDFont descriptor widths and FontDescriptor flags before WordPress text-gap grouping, preserving current-base text layout without Python/pdftext execution.",
            "pdfFontType0Type3BroaderBehaviorCurrentBase": "Type0 direct-referenced DescendantFonts dictionaries and Type3 CMap CharProc d0/d1 widths now feed native text advance grouping before WordPress extraction without Python/pdftext execution.",
            "pdfOutlineStructureDestinationPageContextCurrentBase": "Outline structure state, style, color, destination page labels, transitions, and actions now enrich navigation review rows without promoting dictionary/action text into visible WordPress paragraphs.",
            "pdfXrefStreamPrevGenerationIndexCurrentBase": "Current xref-stream generation Index rows now preserve metadata imports while excluding stale /Prev duplicate rows before WordPress document metadata and text extraction."
        },
        "pdfTextIndirectFontDescriptorBehaviors": 1,
        "mappedPdfTextIndirectFontDescriptorBehaviors": 1,
        "pdfStructTreeRoleMapBehaviors": 1,
        "mappedPdfStructTreeRoleMapBehaviors": 1,
        "pdfAcroFormXfaXdpStreamBehaviors": 1,
        "mappedPdfAcroFormXfaXdpStreamBehaviors": 1,
        "pdfTextStreamLengthRecoveryBehaviors": 1,
        "mappedPdfTextStreamLengthRecoveryBehaviors": 1,
        "pdfTrailerRootGenerationRecoveryBehaviors": 1,
        "mappedPdfTrailerRootGenerationRecoveryBehaviors": 1,
        "pdfTextStandardMacRomanSymbolEncodingBehaviors": 1,
        "mappedPdfTextStandardMacRomanSymbolEncodingBehaviors": 1,
        "pdfDocEncodingMetadataBehaviors": 1,
        "mappedPdfDocEncodingMetadataBehaviors": 1,
        "pdfInlineImageAbbrevDecodeParmsBehaviors": 1,
        "mappedPdfInlineImageAbbrevDecodeParmsBehaviors": 1,
        "pdfIndirectDestinationViewOperandBehaviors": 1,
        "mappedPdfIndirectDestinationViewOperandBehaviors": 1,
        "pdfObjectStreamNestedTokenBoundaryBehaviors": 1,
        "mappedPdfObjectStreamNestedTokenBoundaryBehaviors": 1,
        "pdfType0EncodingCMapWidthPriorityBehaviors": 1,
        "mappedPdfType0EncodingCMapWidthPriorityBehaviors": 1,
        "pdfFilespecPayloadBoundaryBehaviors": 1,
        "mappedPdfFilespecPayloadBoundaryBehaviors": 1,
        "pdfSubsetLigatureGlyphNameBehaviors": 1,
        "mappedPdfSubsetLigatureGlyphNameBehaviors": 1,
        "pdfAcroFormWidgetDefaultStateBehaviors": 1,
        "mappedPdfAcroFormWidgetDefaultStateBehaviors": 1,
        "pdfPageBoxIndirectOperandBehaviors": 1,
        "mappedPdfPageBoxIndirectOperandBehaviors": 1,
        "pdfFormXObjectMatrixBBoxBehaviors": 1,
        "mappedPdfFormXObjectMatrixBBoxBehaviors": 1,
        "suppliedMergedTableBoundaryBehaviors": 1,
        "mappedSuppliedMergedTableBoundaryBehaviors": 1,
        "pdfImageDecodeStencilPreviewBehaviors": 1,
        "mappedPdfImageDecodeStencilPreviewBehaviors": 1,
        "pdfAnnotationAppearanceResourceBoundaryBehaviors": 1,
        "mappedPdfAnnotationAppearanceResourceBoundaryBehaviors": 1,
        "pdfAcroFormCalculationSignatureStateBehaviors": 1,
        "mappedPdfAcroFormCalculationSignatureStateBehaviors": 1,
        "pdfType0EncodingCMapBoundaryBehaviors": 1,
        "mappedPdfType0EncodingCMapBoundaryBehaviors": 1,
        "mappedPdfAcroFormXfaSignatureWidgetStateBehaviors": 1,
        "pdfAcroFormXfaSignatureWidgetStateBehaviors": 1,
        "pdfSecurityPreflightDecisionBehaviors": 1,
        "mappedPdfSecurityPreflightDecisionBehaviors": 1,
        "pdfFilespecPieceInfoPrivateStreamBehaviors": 1,
        "mappedPdfFilespecPieceInfoPrivateStreamBehaviors": 1,
        "mappedPdfRichMediaActionPopupBoundaryBehaviors": 1,
        "pdfRichMediaActionPopupBoundaryBehaviors": 1,
        "mappedPdfPageLabelNumberTreeLimitBehaviors": 1,
        "mappedPdfViewerPreferenceBoundedOperandBehaviors": 1,
        "pdfPageLabelNumberTreeLimitBehaviors": 1,
        "pdfViewerPreferenceBoundedOperandBehaviors": 1,
        "mappedPdfPageContentsIndirectArrayResourceBehaviors": 1,
        "pdfPageContentsIndirectArrayResourceBehaviors": 1,
        "pdfMarkInfoPageAssociatedFileBehaviors": 1,
        "mappedPdfMarkInfoPageAssociatedFileBehaviors": 1,
        "pdfHybridXrefFreeEntryConflictBehaviors": 1,
        "mappedPdfHybridXrefFreeEntryConflictBehaviors": 1,
        "pdfAcroFormDefaultResourceReviewBehaviors": 1,
        "mappedPdfAcroFormDefaultResourceReviewBehaviors": 1,
        "pdfCatalogOpenActionNextChainBehaviors": 1,
        "mappedPdfCatalogOpenActionNextChainBehaviors": 1,
        "nativeScenarios": {
            "pdfLinearizedIncrementalHintObjectBoundary": "Linearized PDF /H hint ranges resolve indirect numeric operands and remove matching hint stream objects before incremental current-root page text import.",
            "pdfMovieSoundRenditionPopupBoundary": "Movie, Sound, and Rendition annotation actions expose target annotations, media files, sound stream metadata, rendition operation labels, media clip details, and popup review rows without executing media or leaking payload text.",
            "pdfCMapSourceGlyphAdvanceBoundary": "ToUnicode CMap source-code boundaries provide fallback glyph advances when explicit font widths are absent, preserving word gaps for bidi isolate controls plus surrogate-pair scalars.",
            "pdfXrefStreamInvalidExplicitOffsetBoundary": "Xref-stream type-1 entries with explicit offsets require a valid direct object boundary; invalid explicit offsets no longer fall back to stale /Prev generation rows, while omitted offset fields keep generation repair behavior.",
            "pdfCurrentXrefStreamEncryptedMetadataBoundary": "Latest PDF xref-stream trailer /Encrypt dictionaries are authoritative metadata-source boundaries: EncryptMetadata false preserves XMP dates and trailer IDs while encrypted Info, OutputIntent metadata, raw key material, and visible text are suppressed without decryption.",
            "pdfCompositeNavigationReviewMetadata": "PDF outline destination view metadata, catalog OpenAction review actions, PageLabels, and target-page transitions are exposed as one composite non-executing navigation review payload for import UIs.",
            "pdfSuppliedOcrPredictionTable": "Forced OCR table pages accept upstream-shaped OCR prediction objects with text_lines/lines and render the recognized table before WordPress block conversion while excluding stale pdftext table text.",
            "pdfIndexedIccJbigSoftMaskPreview": "A PDF image XObject using /Indexed [/ICCBased ...], /JBIG2Decode with /JBIG2Globals, and inverted /SMask Decode emits review metadata and a WordPress preview swatch without raster execution.",
            "pdfParserStreamFilterObjectBoundary": "A PDF with stale/free compressed stream objects plus a current compressed stream emits only current text, and inline-image payloads containing fake stream/endstream tokens do not leak into visible text.",
            "pdfCatalogPieceInfoMetadataBoundary": "A PDF catalog with PieceInfo private Metadata and OutputIntents emits review metadata for the private dictionaries, but document xmp/output_intents remain empty and visible WordPress text excludes private ICC/XMP payloads.",
            "pdfOutlineNameTreeTargetPresentation": "A PDF outline with /Dest and GoTo entries resolving through a destination name tree emits Deck 5 outline rows with Fly transition and page open/close action review metadata, all non-executing.",
            "pdfTextOperatorSourceSpaceWordSpacing": "A one-glyph source code mapped through ToUnicode to RLI + A B + PDI is followed by positioned C; native extraction preserves the internal decoded space and a real positioned gap without treating the replacement-space as a source glyph space.",
            "pdfSimpleFontIndirectFirstCharWidths": "A PDF with subset Type1 fonts using indirect /FirstChar objects and direct/indirect /Widths emits WideBlock and Thin Text, proving wide subset glyphs stay joined and thin subset word gaps remain separated without external PDF tools.",
            "pdfHybridXrefCurrentTableGenerationPrecedence": "A PDF with a current xref table generation-1 page and a companion xref stream advertising a stale compressed generation-0 page emits only Current direct generation page and Hybrid table boundary kept while excluding stale compressed previous-generation text.",
            "pdfSignatureReferenceTransformReview": "A signed PDF with FieldMDP Include fields and UR3 usage-rights transform params imports visible text, emits review_required_signature_metadata with FieldMDP/UR3 methods, suppresses raw digest/signature bytes, and keeps validation/enforcement flags false.",
            "pdfRichMediaScreenActionTargetBoundary": "Screen annotation Movie/Rendition action target references are reviewed only when detached from the current page Annots list; stale target media dictionaries, file names, appearances, and payload text are not promoted into WordPress content.",
            "pdfAcroFormNonJavaScriptActionReview": "AcroForm field/widget URI, Launch, ImportData, Hide, Named, GoTo, and GoToR action dictionaries, including /Next chains, are emitted as review-only metadata; unsafe javascript: URI, launch, hide, and import-data actions never execute or import payloads during WordPress conversion.",
            "pdfAssociatedFilespecIndirectPieceInfo": "Catalog /AF associated Filespec /PieceInfo application dictionaries can be indirect objects; their /Private streams are exposed as checksum-only review metadata and excluded from fallback visible text extraction.",
            "pdftextTableLineStructureRouting": "Pdftext high-resolution blocks/lines/spans/chars are filtered to table bboxes, split by character gaps into table-local cells, and used to populate rows/columns-only recognized tables without OCR/model execution.",
            "pdfCidFontIndirectVerticalMetrics": "CIDFont /DW2 and /W2 vertical metric arrays resolve indirect objects before writing-mode 1 text advance grouping, preserving VertImport and DataFlow without Python, pdftext, pypdfium, or external PDF tools.",
            "pdfXrefStreamPrevIndexWidthBoundary": "PDF xref-stream /Prev chains with sparse /Index and zero-width /W fields keep the first current-section row when a malformed duplicate row points at stale content, preserving current page text without Python, pdftext, pypdfium, or external PDF tools.",
            "pdfAcroFormXfaSignatureBoundary": "AcroForm /XFA packets with template, datasets, and signature sections link approval.signature data paths to the AcroForm signature field as review metadata while static AcroForm values remain authoritative and signing, validation, XFA JavaScript, Python/models, and external PDF tools stay disabled.",
            "pdfDctDecodeCmykDecodePreview": "DCTDecode CMYK/YCCK image /Decode arrays, including indirect decode arrays, are applied before RGB preview conversion while component mismatches remain review-only metadata and Python/model/pypdfium/PIL/external PDF execution stays disabled.",
            "pdfRichMediaEmbeddedFilespecReview": "RichMedia target-instance /Asset FileSpec embedded streams expose review-only EF key/object, MIME type, declared and decoded sizes, SHA-256, Params Size, CheckSum, computed MD5 match state, and dates while media bytes, stale catalog media, JavaScript, playback, Python/models, and external PDF tools stay disabled.",
            "pdfParserXrefOffsetOwnerBoundary": "A PDF with a fake xref table embedded inside a stream object keeps the current page tree and excludes the stale stream-owned xref payload, proving xref table offsets owned by another direct object body are rejected before WordPress text extraction.",
            "pdfIndexedDecodeSoftMaskColorSpaceBoundary": "Indexed image color spaces default missing image Decode arrays to [0 hival], apply Decode before palette lookup, clip decoded indices to hival, and combine soft-mask Decode alpha before RGB preview without pypdfium/PIL/Python execution.",
            "pdfXrefObjectStreamFilterChainOperandRecovery": "Xref-selected object streams with indirect filter-chain operands recovered from helper object streams now decode the current page tree and exclude stale fallback streams before WordPress paragraph output without Python, pdftext, pypdfium, or external PDF tools.",
            "pdfXrefStreamObjectOwnerBoundary": "PDF xref-stream object headers embedded inside another stream payload can no longer own startxref or redirect WordPress text extraction; the current xref stream page remains authoritative while stale embedded xref stream text and owner carrier payload text stay excluded without Python, pdftext, pypdfium, or external PDF tools.",
            "tableOcrMergedCellGeometryReview": "Forced-OCR table cells in supplied markerPDF documents now preserve tabled-style row_ids and col_ids as table_merged_cell_geometry before Markdown drops non-anchor span occupancy, so WordPress table previews emit colspan and rowspan metadata while stale pdftext table lines stay excluded without Python, Surya, tabled, pdftext, pypdfium, or external PDF tools.",
            "pdfFontCidWidthCMapResource": "Named Type0 /Encoding CMap resources and UseCMap base resources resolve before CIDFont /W widths, preserving WideBlock and Thin Text spacing for WordPress paragraphs without Python/models/external PDF tools.",
            "pdfXrefStreamPrevHybridOwner": "Current xref-stream object-stream owners win over stale /Prev hybrid type-2 rows, excluding stale previous-page and replaced-carrier payload text while preserving current page text.",
            "pdfDocumentSecurityStoreIndirectFilter": "DSS Cert/OCSP/CRL/VRI timestamp validation streams resolve indirect Filter operands before decoded-byte hashing, while raw validation bytes and signature/revocation/trust-chain execution remain blocked.",
            "pdfOutlineDestinationActionTransition": "Outline /Dest values resolving to GoTo action dictionaries expose local destinations, chained URI/JavaScript followups, PageLabels, page /Dur, /Trans, and /AA rows as review-only navigation metadata without executing actions.",
            "pdfPageAssociatedTransitionAction": "Page /AF Filespec rows compose with page /Dur, /Trans, /AA URI/GoToR action review metadata while associated payloads and action operands stay out of visible WordPress text.",
            "tableMergedCellSpanStopBoundary": "Tabled-style row/column span expansion stops at the first unspanned band, preventing discontiguous table cells from exporting false colspan geometry.",
            "pdfIccDeviceNSoftMaskPreview": "DeviceN samples map through image Decode to named colorant tints, preserve ICCBased alternate metadata, and attach decoded soft-mask alpha before RGB preview without Python/PIL/pypdfium raster execution.",
            "pdfPortfolioAttachmentXmpOutputIntentReview": "Portfolio and associated FileSpec-local /Metadata XMP and /OutputIntents are emitted as attachment review metadata while XMP titles, ICC payloads, and PDF/A roots are not promoted to document metadata.",
            "pdfAcroFormSignatureFieldActionState": "Signed AcroForm signature fields summarize current /V signature dictionaries, selected widget appearance, /Lock field scope, and field/widget actions as review-only metadata without signing, validation, JavaScript, launch, or URI execution.",
            "pdfXrefStreamFilterDecodeParms": "Xref stream filter chains apply DecodeParms predictor rows before object selection so current xref-stream pages win and stale generation text stays excluded.",
            "pdfEncryptionPermissionHandlerReview": "Encrypted PDF Standard permission words with malformed reserved bits and unsupported non-Standard handlers are exposed as review-only permission_handler_review metadata while encrypted text, raw key material, decryption, and permission enforcement stay blocked.",
            "pdfType0IndirectDefaultWidth": "Type0 descendant CIDFont /DW operands stored as indirect objects resolve before text-advance grouping, preserving WordPress paragraph spacing without treating object numbers as widths.",
            "pdfXrefIncrementalFreeEntryGeneration": "Latest incremental xref-stream free generation rows suppress stale /Prev page and content objects so current replacement page text is imported and stale text is excluded.",
            "tableHeaderSpanningGridReview": "Supplied table recognition exports first-row and first-column spanning header grids with scoped th/td render metadata before Markdown drops covered span cells.",
            "pdfAcroFormXfaSignatureWidgetReview": "AcroForm signature widgets correlate XFA packet paths, signed /V state, widget flags, selected appearance state, seed constraints, locks, and actions as review-only metadata.",
            "pdfIccSoftMaskDecodeReview": "ICCBased image soft-mask Decode and Matte component metadata is reviewed before RGB preview; non-grayscale or component-mismatched masks remain review-only without raster execution.",
            "pdfNamedDestinationFitOperandReview": "Named destination Fit-family operands are normalized to fixed PDF view parameter counts before WordPress navigation metadata is emitted.",
            "pdfParserStreamDecodeParmsOwnerBoundary": "Direct-object stream owner scanning resolves simple indirect /Length values so fake /DecodeParms object headers embedded inside stream payloads cannot override current predictor dictionaries.",
            "pdfMetadataCollectionSchemaAssociatedReview": "Catalog /Collection schema, sort order, and associated FileSpec rows are exposed as review metadata with field values, checksums, attachment-local Metadata/OutputIntents, and payload omission boundaries.",
            "pdfCalibratedSoftMaskReview": "CalGray, CalRGB, and Lab image color-space review metadata and default Decode handling feed soft-mask alpha preview before RGB planning without raster execution.",
            "pdfXrefIncrementalObjectStreamFreeRepair": "Previous type-2 xref rows whose object-stream carrier was not selected are skipped before incremental free repair so stale compressed pages cannot leak into WordPress text.",
            "pdfObjectStreamNestedFilterBoundary": "Nested /Filter arrays inside object streams fail closed while safe direct fallback paragraph streams remain visible and xref stream payload text stays excluded.",
            "pdfOutlineRemoteDestinationActionReview": "Outline /Dest values resolving to GoToR action dictionaries are remote review metadata rather than same-document TOC rows, preserving local fallbacks and excluding remote operands from visible text."
        },
        "pdfCurrentXrefStreamEncryptedMetadataBoundary": 1,
        "mappedPdfCurrentXrefStreamEncryptedMetadataBoundary": 1,
        "pdfCompositeNavigationReviewMetadataBehaviors": 1,
        "mappedPdfCompositeNavigationReviewMetadataBehaviors": 1,
        "pdfSuppliedOcrPredictionTableBehaviors": 1,
        "mappedPdfSuppliedOcrPredictionTableBehaviors": 1,
        "pdfIndexedIccJbigSoftMaskPreviewBehaviors": 1,
        "mappedPdfIndexedIccJbigSoftMaskPreviewBehaviors": 1,
        "pdfParserStreamFilterObjectBoundaryBehaviors": 1,
        "mappedPdfParserStreamFilterObjectBoundaryBehaviors": 1,
        "pdfCatalogPieceInfoMetadataBoundaryBehaviors": 1,
        "mappedPdfCatalogPieceInfoMetadataBoundaryBehaviors": 1,
        "pdfOutlineNameTreeTargetPresentationBehaviors": 1,
        "mappedPdfOutlineNameTreeTargetPresentationBehaviors": 1,
        "pdfTextOperatorSourceSpaceWordSpacingBehaviors": 1,
        "mappedPdfTextOperatorSourceSpaceWordSpacingBehaviors": 1,
        "pdfSimpleFontIndirectFirstCharWidthBehaviors": 1,
        "mappedPdfSimpleFontIndirectFirstCharWidthBehaviors": 1,
        "pdfSimpleFontAverageWidthAdvanceBoundaryBehaviors": 1,
        "mappedPdfSimpleFontAverageWidthAdvanceBoundaryBehaviors": 1,
        "pdfHybridXrefCurrentTableGenerationPrecedenceBehaviors": 1,
        "mappedPdfHybridXrefCurrentTableGenerationPrecedenceBehaviors": 1,
        "pdfSignatureReferenceTransformReviewBehaviors": 1,
        "mappedPdfSignatureReferenceTransformReviewBehaviors": 1,
        "pdfRichMediaScreenActionTargetBoundaryBehaviors": 1,
        "mappedPdfRichMediaScreenActionTargetBoundaryBehaviors": 1,
        "pdfAcroFormNonJavaScriptActionReviewBehaviors": 1,
        "mappedPdfAcroFormNonJavaScriptActionReviewBehaviors": 1,
        "pdfAssociatedFilespecIndirectPieceInfoBehaviors": 1,
        "mappedPdfAssociatedFilespecIndirectPieceInfoBehaviors": 1,
        "pdftextTableLineStructureRoutingBehaviors": 1,
        "mappedPdftextTableLineStructureRoutingBehaviors": 1,
        "pdfDocumentSecurityStoreReviewBehaviors": 1,
        "mappedPdfDocumentSecurityStoreReviewBehaviors": 1,
        "pdfRichMediaExecuteTargetInstanceReviewBehaviors": 1,
        "mappedPdfRichMediaExecuteTargetInstanceReviewBehaviors": 1,
        "pdfWidgetAnnotationAppearanceActionBehaviors": 1,
        "mappedPdfWidgetAnnotationAppearanceActionBehaviors": 1,
        "pdfHybridXrefObjectStreamGenerationBehaviors": 1,
        "mappedPdfHybridXrefObjectStreamGenerationBehaviors": 1,
        "tableMergedCellGeometryBehaviors": 1,
        "mappedTableMergedCellGeometryBehaviors": 1,
        "pdfCidFontIndirectVerticalMetricBehaviors": 1,
        "mappedPdfCidFontIndirectVerticalMetricBehaviors": 1,
        "pdfXrefStreamPrevIndexWidthBoundaryBehaviors": 1,
        "mappedPdfXrefStreamPrevIndexWidthBoundaryBehaviors": 1,
        "pdfAcroFormXfaSignatureBoundaryBehaviors": 1,
        "mappedPdfAcroFormXfaSignatureBoundaryBehaviors": 1,
        "pdfDctDecodeCmykDecodePreviewBehaviors": 1,
        "mappedPdfDctDecodeCmykDecodePreviewBehaviors": 1,
        "pdfRichMediaEmbeddedFilespecReviewBehaviors": 1,
        "mappedPdfRichMediaEmbeddedFilespecReviewBehaviors": 1,
        "pdfAcroFormFieldHierarchyValueBoundaryBehaviors": 1,
        "mappedPdfAcroFormFieldHierarchyValueBoundaryBehaviors": 1,
        "pdfOutputIntentAssociatedFileBoundaryBehaviors": 1,
        "mappedPdfOutputIntentAssociatedFileBoundaryBehaviors": 1,
        "mappedPdfParserXrefOffsetOwnerBoundaryBehaviors": 1,
        "pdfParserXrefOffsetOwnerBoundaryBehaviors": 1,
        "wordpressScenarios": {
            "pdfPageLabelsIndirectOperandsBoundary": "Catalog /PageLabels entries with indirect /S, /P, and /St operands now produce matching native text-extraction and marker-app preview page-break metadata before Gutenberg paragraph rendering, without Python/models or external PDF tools.",
            "pdfParserXrefOffsetOwnerBoundary": "Stream-owned fake xref table payloads can no longer redirect WordPress PDF text extraction to stale page trees; xref table offsets inside another direct object body are rejected while current owner-boundary page text remains visible and no Python/models or external PDF tools execute.",
            "pdfIndexedDecodeSoftMaskColorSpaceBoundary": "A PDF image XObject using /Indexed /DeviceRGB without an explicit Decode emits the default [0 hival] decode plan, maps raw sample 3 to palette index 2, applies inverted soft-mask alpha, and clips an explicit out-of-range Decode to hival without raster execution.",
            "pdfPublicKeyDssPermissionReview": "Public-key crypt-filter recipient selection and DSS validation streams are exposed as review-only security metadata while encrypted text, raw CMS bytes, revocation checks, and trust-chain validation stay blocked.",
            "pdfXrefHybridDirectGenerationRepair": "Hybrid xref direct rows are repaired when the current page tree references a newer generation, preserving current page text and excluding stale generation-zero page/metadata objects before WordPress import.",
            "tableOcrMergedCellHeaderAxisReview": "Forced-OCR merged table headers carry row, column, or both-axis review metadata through supplied table conversion before Markdown formatting drops covered span cells.",
            "pdfAcroFormXfaAppearanceReview": "XFA-backed AcroForm rollover/down appearance states are reviewed as metadata without importing dynamic form payloads.",
            "pdfCurrentXrefMetadataOutputIntentAssociatedFileReview": "Current xref-selected catalog metadata preserves OutputIntent and associated FileSpec review metadata while excluding stale metadata roots and payload bytes.",
            "pdfObjectStreamInlineImageFilterRepair": "Object-stream filter helper operands are recovered before inline image tokenization so EI-looking image bytes stay inside image payload boundaries.",
            "tableGridBorderOcrConflictReview": "OCR table-line bboxes crossing detector grid borders preserve source-order review metadata without stale pdftext table-line promotion.",
            "pdfSecurityLaunchUriCertPermissionReview": "Launch and URI actions are summarized alongside certifying signature permissions while action targets stay review-only and out of visible text.",
            "pdfInlineJpxSmaskDecodeReview": "Inline JPX image payloads with EI-looking bytes and soft-mask Decode metadata are reviewed before RGB planning without raster execution.",
            "pdfOutlineOpenActionThreadPieceInfoReview": "Catalog OpenAction name-tree targets propagate target page, PieceInfo, and article-thread context into outline review rows without visible text leakage.",
            "pdfAcroFormWidgetRichTextActionResourceReview": "Widget rich text default style, action, and resource dictionaries are reviewed without importing active payloads or executing actions.",
            "markerBenchmarkOutputBoundary": "overall.py-style benchmark report output files and tabulate source rows are represented without executing upstream Python workers.",
            "pdfMetadataDssOutputIntentNameTreeReview": "Catalog DSS, OutputIntent, and name-tree metadata are composed as review-only rows without promoting validation bytes or payload streams.",
            "pdfPageStructParentsAssociatedThreadsReview": "Page StructParents ParentTree rows compose with associated files and article-thread metadata for WordPress page review.",
            "pdfIndexedSeparationSoftMaskReview": "Indexed Separation palette tints and soft-mask alpha are represented before RGB preview planning without raster execution.",
            "pdfCMapUseCMapVerticalWidthReview": "CMap dictionary UseCMap vertical writing mode is inherited before CIDFont W2 width grouping for WordPress text spacing.",
            "pdfParserStreamFilterXrefOwnerCurrentBase": "Indirect stream Filter references must match the current xref-selected generation before decoding, so stale helper generations are rejected while current direct streams stay visible.",
            "pdfSoftMaskIndexedJpxBoundaryCurrentBase": "Indexed JPX image streams remain preview-only while current-object soft-mask filter chains decode for review metadata before RGB planning.",
            "pdfSecurityDssActionByteRangeCurrentBase": "DSS-backed signed PDFs review post-signature Launch/URI actions outside signature ByteRanges without executing actions, validating signatures, or exposing validation payloads.",
            "pdfMetadataPieceInfoAssociatedOutputIntentCurrentBase": "Catalog associated FileSpec PieceInfo, attachment-local Metadata, and OutputIntents are preserved as provenance review rows while private payloads and stale xref duplicates stay out of document roots.",
            "pdfAcroFormWidgetAppearanceCharacteristicsCurrentBase": "AcroForm widget MK captions, colors, icons, icon-fit, and highlight modes are review metadata while field values remain authoritative for WordPress import.",
            "pdfPageStructParentsPieceInfoThreadCurrentBase": "Page StructParents ParentTree rows carry StructElem review metadata, associated FileSpec provenance, PieceInfo, and article-thread context without leaking review text or attachment payloads into visible content.",
            "pdfFontCidUseCMapWidthCurrentBase": "Object-valued Type0 UseCMap stream bases decode before derived CMap parsing so base CIDs feed descendant CIDFont width grouping.",
            "pdfParserInlineStreamOwnerCurrentBase": "Missing or stale stream owner recovery skips inline image BI/ID/EI payloads so fake endstream/endobj tokens do not truncate or leak page text.",
            "pdfSecurityByteRangeDssRevisionCurrentBase": "Signed PDFs now distinguish prior-revision and current-revision ByteRange coverage and correlate DSS VRI evidence with object placement as review-only security metadata, without signature validation, revocation checks, action execution, decryption, or raw payload exposure.",
            "pdfPageStructParentUserPropertyCurrentBase": "Tagged PDF page StructParents resolve through ParentTree rows so user properties, roles, MCIDs, and titles stay attached to page review metadata without leaking review-only text into visible WordPress paragraphs.",
            "tableOcrBorderConflictGridCurrentBase": "Forced-OCR table cells crossing detector grid borders keep assigned row/column ids and spanning render-cell links so WordPress table review can inspect border conflicts without stale pdftext promotion or Python/model execution.",
            "pdfParserFilterArrayDictOwnerCurrentBase": "PDF stream filter arrays that contain dictionaries now fail closed before current-base text extraction, preventing raw or malformed stream bytes from becoming WordPress paragraph text without Python/models/external PDF tools.",
            "pdfImageDeviceNTransferJpxBoundaryCurrentBase": "DeviceN and Separation image JPX streams remain review-only while tint metadata, transfer-function soft masks, and SMaskInData precedence are preserved before WordPress image preview planning without raster execution.",
            "pdfMetadataAssociatedFileSchemaCurrentBase": "PDF Portfolio and associated-file collection schema rows now propagate field priorities, descriptions, value dictionaries, checksums, and payload omission metadata into WordPress review rows without promoting attachment payload text.",
            "pdfFontCidSetIndirectWidthBoundaryCurrentBase": "CIDFont /FontDescriptor /CIDSet streams resolve through exact-generation indirect references before default-width grouping so stale same-number CIDSet data cannot introduce false WordPress text gaps.",
            "pdfXrefStreamPrevIndexGenerationCurrentBase": "Sparse xref-stream /Index rows with /Prev generation noise now preserve valid previous type-2 object-stream rows only when the carrier storage still matches current offsets before WordPress text extraction.",
            "markerRuntimeServerConfigErrorBoundaryCurrentBase": "marker_server.py-style startup config is represented as structured host/port/API-key/upload-directory/error planning metadata, with invalid ports and upload-directory failures returned before FastAPI/Uvicorn/Python/model execution.",
            "pdfPageAnnotationParentTreeWidgetCurrentBase": "Widget annotations now inherit parent-field StructParent metadata only when their OBJR points to the current page widget, preserving tagged annotation review context without leaking action or field payload text into WordPress paragraphs.",
            "tableHeaderGridRowspanCurrentBase": "Rowspanned first-column and grouped top-row headers now export column_header_rows, ids, scopes, and data-cell header references so WordPress table imports keep accessible header relationships after Markdown formatting.",
            "pdfSecurityCertPermissionOpenActionCurrentBase": "Certified PDFs now report catalog OpenAction chains alongside DocMDP/certificate permission review metadata while certificate bytes, signature bytes, JavaScript, Launch, URI actions, validation, decryption, and enforcement remain non-executing.",
            "pdfAcroFormResourceActionFileSpecCurrentBase": "AcroForm non-JavaScript action FileSpec dictionaries now expose embedded resource review metadata, Params, hashes, and omission boundaries without submitting/importing files, executing actions, or leaking payload bytes into WordPress content.",
            "pdfParserNameEscapeArrayBoundaryCurrentBase": "Escaped PDF names inside array operands now split at true array boundaries, preventing merged name tokens from bypassing optional-content and parser safety decisions before WordPress text extraction.",
            "pdfImageIndexedColorKeyTransferCurrentBase": "Indexed ColorKey masks now apply against raw palette indexes before Decode transfer while decoded preview rows keep alpha review metadata without raster execution.",
            "pdfFontType0CMapDescriptorWidthCurrentBase": "Type0 resource CMaps now feed descendant CIDFont descriptor widths and FontDescriptor flags before WordPress text-gap grouping, preserving current-base text layout without Python/pdftext execution.",
            "pdfFontType0Type3BroaderBehaviorCurrentBase": "Type0 direct-referenced DescendantFonts dictionaries and Type3 CMap CharProc d0/d1 widths now feed native text advance grouping before WordPress extraction without Python/pdftext execution.",
            "pdfOutlineStructureDestinationPageContextCurrentBase": "Outline structure state, style, color, destination page labels, transitions, and actions now enrich navigation review rows without promoting dictionary/action text into visible WordPress paragraphs.",
            "pdfXrefStreamPrevGenerationIndexCurrentBase": "Current xref-stream generation Index rows now preserve metadata imports while excluding stale /Prev duplicate rows before WordPress document metadata and text extraction."
        },
        "pdfIndexedDecodeSoftMaskColorSpaceBoundaryBehaviors": 1,
        "mappedPdfIndexedDecodeSoftMaskColorSpaceBoundaryBehaviors": 1,
        "pdfXrefObjectStreamFilterChainOperandBehaviors": 1,
        "mappedPdfXrefObjectStreamFilterChainOperandBehaviors": 1,
        "pdfXrefStreamObjectOwnerBoundaryBehaviors": 1,
        "mappedPdfXrefStreamObjectOwnerBoundaryBehaviors": 1,
        "tableOcrMergedCellGeometryBehaviors": 1,
        "mappedTableOcrMergedCellGeometryBehaviors": 1,
        "pdfFontCidWidthCMapResourceBehaviors": 1,
        "mappedPdfFontCidWidthCMapResourceBehaviors": 1,
        "pdfXrefStreamPrevHybridOwnerBehaviors": 1,
        "mappedPdfXrefStreamPrevHybridOwnerBehaviors": 1,
        "pdfDocumentSecurityStoreIndirectFilterBehaviors": 1,
        "mappedPdfDocumentSecurityStoreIndirectFilterBehaviors": 1,
        "pdfOutlineDestinationActionTransitionBehaviors": 1,
        "mappedPdfOutlineDestinationActionTransitionBehaviors": 1,
        "pdfPageAssociatedTransitionActionBehaviors": 1,
        "mappedPdfPageAssociatedTransitionActionBehaviors": 1,
        "tableMergedCellSpanStopBoundaryBehaviors": 1,
        "mappedTableMergedCellSpanStopBoundaryBehaviors": 1,
        "pdfIccDeviceNSoftMaskPreviewBehaviors": 1,
        "mappedPdfIccDeviceNSoftMaskPreviewBehaviors": 1,
        "pdfPortfolioAttachmentXmpOutputIntentReviewBehaviors": 1,
        "mappedPdfPortfolioAttachmentXmpOutputIntentReviewBehaviors": 1,
        "pdfAcroFormSignatureFieldActionStateBehaviors": 1,
        "mappedPdfAcroFormSignatureFieldActionStateBehaviors": 1,
        "pdfXrefStreamFilterDecodeParmsBehaviors": 1,
        "mappedPdfXrefStreamFilterDecodeParmsBehaviors": 1,
        "pdfEncryptionPermissionHandlerReviewBehaviors": 1,
        "mappedPdfEncryptionPermissionHandlerReviewBehaviors": 1,
        "pdfType0IndirectDefaultWidthBehaviors": 1,
        "mappedPdfType0IndirectDefaultWidthBehaviors": 1,
        "pdfXrefIncrementalFreeEntryGenerationBehaviors": 1,
        "mappedPdfXrefIncrementalFreeEntryGenerationBehaviors": 1,
        "tableHeaderSpanningGridReviewBehaviors": 1,
        "mappedTableHeaderSpanningGridReviewBehaviors": 1,
        "pdfAcroFormXfaSignatureWidgetReviewBehaviors": 1,
        "mappedPdfAcroFormXfaSignatureWidgetReviewBehaviors": 1,
        "pdfIccSoftMaskDecodeReviewBehaviors": 1,
        "mappedPdfIccSoftMaskDecodeReviewBehaviors": 1,
        "pdfNamedDestinationFitOperandReviewBehaviors": 1,
        "mappedPdfNamedDestinationFitOperandReviewBehaviors": 1,
        "pdfDestinationViewOperandNormalizationBehaviors": 1,
        "mappedPdfDestinationViewOperandNormalizationBehaviors": 1,
        "pdfParserStreamDecodeParmsOwnerBoundaryBehaviors": 1,
        "mappedPdfParserStreamDecodeParmsOwnerBoundaryBehaviors": 1,
        "pdfMetadataCollectionSchemaAssociatedReviewBehaviors": 1,
        "mappedPdfMetadataCollectionSchemaAssociatedReviewBehaviors": 1,
        "pdfCalibratedSoftMaskReviewBehaviors": 1,
        "mappedPdfCalibratedSoftMaskReviewBehaviors": 1,
        "pdfXrefIncrementalObjectStreamFreeRepairBehaviors": 1,
        "mappedPdfXrefIncrementalObjectStreamFreeRepairBehaviors": 1,
        "pdfObjectStreamNestedFilterBoundaryBehaviors": 1,
        "mappedPdfObjectStreamNestedFilterBoundaryBehaviors": 1,
        "pdfOutlineRemoteDestinationActionReviewBehaviors": 1,
        "mappedPdfOutlineRemoteDestinationActionReviewBehaviors": 1,
        "tableRotatedRowspanHeaderGridReviewBehaviors": 1,
        "mappedTableRotatedRowspanHeaderGridReviewBehaviors": 1,
        "pdfSecurityPermissionDigestCertifyReviewBehaviors": 1,
        "mappedPdfSecurityPermissionDigestCertifyReviewBehaviors": 1,
        "pdfAcroFormChoiceRichTextSubmitResetReviewBehaviors": 1,
        "mappedPdfAcroFormChoiceRichTextSubmitResetReviewBehaviors": 1,
        "pdfPageArticleThreadPieceInfoMcrReviewBehaviors": 1,
        "mappedPdfPageArticleThreadPieceInfoMcrReviewBehaviors": 1,
        "pdfMetadataOutputIntentLangAssociatedFileReviewBehaviors": 1,
        "mappedPdfMetadataOutputIntentLangAssociatedFileReviewBehaviors": 1,
        "pdfFontCidSetVerticalSurrogateWidthReviewBehaviors": 1,
        "mappedPdfFontCidSetVerticalSurrogateWidthReviewBehaviors": 1,
        "pdfOutlinePagePieceInfoTransitionThreadReviewBehaviors": 1,
        "mappedPdfOutlinePagePieceInfoTransitionThreadReviewBehaviors": 1,
        "pdfImageSoftMaskTransferIndexedReviewBehaviors": 1,
        "mappedPdfImageSoftMaskTransferIndexedReviewBehaviors": 1,
        "pdfPublicKeyDssPermissionReviewBehaviors": 1,
        "mappedPdfPublicKeyDssPermissionReviewBehaviors": 1,
        "pdfXrefHybridDirectGenerationRepairBehaviors": 1,
        "mappedPdfXrefHybridDirectGenerationRepairBehaviors": 1,
        "tableOcrMergedCellHeaderAxisReviewBehaviors": 1,
        "mappedTableOcrMergedCellHeaderAxisReviewBehaviors": 1,
        "pdfAcroFormXfaAppearanceReviewBehaviors": 1,
        "mappedPdfAcroFormXfaAppearanceReviewBehaviors": 1,
        "pdfCurrentXrefMetadataOutputIntentAssociatedFileReviewBehaviors": 1,
        "mappedPdfCurrentXrefMetadataOutputIntentAssociatedFileReviewBehaviors": 1,
        "pdfObjectStreamInlineImageFilterRepairBehaviors": 1,
        "mappedPdfObjectStreamInlineImageFilterRepairBehaviors": 1,
        "tableGridBorderOcrConflictReviewBehaviors": 1,
        "mappedTableGridBorderOcrConflictReviewBehaviors": 1,
        "pdfSecurityLaunchUriCertPermissionReviewBehaviors": 1,
        "mappedPdfSecurityLaunchUriCertPermissionReviewBehaviors": 1,
        "pdfInlineJpxSmaskDecodeReviewBehaviors": 1,
        "mappedPdfInlineJpxSmaskDecodeReviewBehaviors": 1,
        "pdfOutlineOpenActionThreadPieceInfoReviewBehaviors": 1,
        "mappedPdfOutlineOpenActionThreadPieceInfoReviewBehaviors": 1,
        "pdfAcroFormWidgetRichTextActionResourceReviewBehaviors": 1,
        "mappedPdfAcroFormWidgetRichTextActionResourceReviewBehaviors": 1,
        "markerBenchmarkOutputBoundaryBehaviors": 1,
        "mappedMarkerBenchmarkOutputBoundaryBehaviors": 1,
        "pdfMetadataDssOutputIntentNameTreeReviewBehaviors": 1,
        "mappedPdfMetadataDssOutputIntentNameTreeReviewBehaviors": 1,
        "pdfPageStructParentsAssociatedThreadsReviewBehaviors": 1,
        "mappedPdfPageStructParentsAssociatedThreadsReviewBehaviors": 1,
        "pdfIndexedSeparationSoftMaskReviewBehaviors": 1,
        "mappedPdfIndexedSeparationSoftMaskReviewBehaviors": 1,
        "pdfCMapUseCMapVerticalWidthReviewBehaviors": 1,
        "mappedPdfCMapUseCMapVerticalWidthReviewBehaviors": 1,
        "pdfParserStreamFilterXrefOwnerCurrentBaseBehaviors": 1,
        "mappedPdfParserStreamFilterXrefOwnerCurrentBaseBehaviors": 1,
        "pdfSoftMaskIndexedJpxBoundaryCurrentBaseBehaviors": 1,
        "mappedPdfSoftMaskIndexedJpxBoundaryCurrentBaseBehaviors": 1,
        "pdfSecurityDssActionByteRangeCurrentBaseBehaviors": 1,
        "mappedPdfSecurityDssActionByteRangeCurrentBaseBehaviors": 1,
        "pdfMetadataPieceInfoAssociatedOutputIntentCurrentBaseBehaviors": 1,
        "mappedPdfMetadataPieceInfoAssociatedOutputIntentCurrentBaseBehaviors": 1,
        "pdfAcroFormWidgetAppearanceCharacteristicsCurrentBaseBehaviors": 1,
        "mappedPdfAcroFormWidgetAppearanceCharacteristicsCurrentBaseBehaviors": 1,
        "pdfPageStructParentsPieceInfoThreadCurrentBaseBehaviors": 1,
        "mappedPdfPageStructParentsPieceInfoThreadCurrentBaseBehaviors": 1,
        "pdfFontCidUseCMapWidthCurrentBaseBehaviors": 1,
        "mappedPdfFontCidUseCMapWidthCurrentBaseBehaviors": 1,
        "pdfParserInlineStreamOwnerCurrentBaseBehaviors": 1,
        "mappedPdfParserInlineStreamOwnerCurrentBaseBehaviors": 1,
        "mappedPdfSecurityByteRangeDssRevisionCurrentBaseBehaviors": 1,
        "pdfSecurityByteRangeDssRevisionCurrentBaseBehaviors": 1,
        "mappedPdfPageStructParentUserPropertyCurrentBaseBehaviors": 1,
        "pdfPageStructParentUserPropertyCurrentBaseBehaviors": 1,
        "mappedTableOcrBorderConflictGridCurrentBaseBehaviors": 1,
        "tableOcrBorderConflictGridCurrentBaseBehaviors": 1,
        "pdfParserFilterArrayDictOwnerCurrentBaseBehaviors": 1,
        "mappedPdfParserFilterArrayDictOwnerCurrentBaseBehaviors": 1,
        "pdfImageDeviceNTransferJpxBoundaryCurrentBaseBehaviors": 1,
        "mappedPdfImageDeviceNTransferJpxBoundaryCurrentBaseBehaviors": 1,
        "pdfMetadataAssociatedFileSchemaCurrentBaseBehaviors": 1,
        "mappedPdfMetadataAssociatedFileSchemaCurrentBaseBehaviors": 1,
        "pdfFontCidSetIndirectWidthBoundaryCurrentBaseBehaviors": 1,
        "mappedPdfFontCidSetIndirectWidthBoundaryCurrentBaseBehaviors": 1,
        "pdfXrefStreamPrevIndexGenerationCurrentBaseBehaviors": 1,
        "mappedPdfXrefStreamPrevIndexGenerationCurrentBaseBehaviors": 1,
        "markerRuntimeServerConfigErrorBoundaryCurrentBaseBehaviors": 1,
        "mappedMarkerRuntimeServerConfigErrorBoundaryCurrentBaseBehaviors": 1,
        "pdfPageAnnotationParentTreeWidgetCurrentBaseBehaviors": 1,
        "mappedPdfPageAnnotationParentTreeWidgetCurrentBaseBehaviors": 1,
        "tableHeaderGridRowspanCurrentBaseBehaviors": 1,
        "mappedTableHeaderGridRowspanCurrentBaseBehaviors": 1,
        "pdfSecurityCertPermissionOpenActionCurrentBaseBehaviors": 1,
        "mappedPdfSecurityCertPermissionOpenActionCurrentBaseBehaviors": 1,
        "pdfAcroFormResourceActionFileSpecCurrentBaseBehaviors": 1,
        "mappedPdfAcroFormResourceActionFileSpecCurrentBaseBehaviors": 1,
        "pdfParserNameEscapeArrayBoundaryCurrentBaseBehaviors": 1,
        "mappedPdfParserNameEscapeArrayBoundaryCurrentBaseBehaviors": 1,
        "pdfImageIndexedColorKeyTransferCurrentBaseBehaviors": 1,
        "mappedPdfImageIndexedColorKeyTransferCurrentBaseBehaviors": 1,
        "pdfFontType0CMapDescriptorWidthCurrentBaseBehaviors": 1,
        "mappedPdfFontType0CMapDescriptorWidthCurrentBaseBehaviors": 1,
        "pdfFontType0Type3BroaderBehaviorCurrentBaseBehaviors": 2,
        "mappedPdfFontType0Type3BroaderBehaviorCurrentBaseBehaviors": 2,
        "pdfOutlineStructureDestinationPageContextCurrentBaseBehaviors": 1,
        "mappedPdfOutlineStructureDestinationPageContextCurrentBaseBehaviors": 1,
        "pdfXrefStreamPrevGenerationIndexCurrentBaseBehaviors": 1,
        "mappedPdfXrefStreamPrevGenerationIndexCurrentBaseBehaviors": 1,
        "pdfMetadataPdfaCatalogAssociatedOutlineCurrentBaseBehaviors": 1,
        "mappedPdfMetadataPdfaCatalogAssociatedOutlineCurrentBaseBehaviors": 1,
        "pdfImageInlineJpxColorKeyOutputPreviewCurrentBaseBehaviors": 1,
        "mappedPdfImageInlineJpxColorKeyOutputPreviewCurrentBaseBehaviors": 1,
        "tableOcrHeaderGridCaptionCellspanCurrentBaseBehaviors": 1,
        "mappedTableOcrHeaderGridCaptionCellspanCurrentBaseBehaviors": 1,
        "pdfFontCidType3ToUnicodeSpacingWidthCurrentBaseBehaviors": 1,
        "mappedPdfFontCidType3ToUnicodeSpacingWidthCurrentBaseBehaviors": 1,
        "pdfParserStreamDictionaryXrefOwnerRecoveryCurrentBaseBehaviors": 1,
        "mappedPdfParserStreamDictionaryXrefOwnerRecoveryCurrentBaseBehaviors": 1,
        "markerRuntimeConvertServerOutputPaginationBoundaryCurrentBaseBehaviors": 1,
        "mappedMarkerRuntimeConvertServerOutputPaginationBoundaryCurrentBaseBehaviors": 1,
        "pdfOutlineDestinationActionPageLabelStructureCurrentBaseBehaviors": 1,
        "mappedPdfOutlineDestinationActionPageLabelStructureCurrentBaseBehaviors": 1,
        "pdfPageAnnotationStructTreeAssociatedTransitionCurrentBaseBehaviors": 1,
        "mappedPdfPageAnnotationStructTreeAssociatedTransitionCurrentBaseBehaviors": 1,
        "pdfXrefStreamPrevHybridGenerationRecoveryCurrentBaseBehaviors": 1,
        "mappedPdfXrefStreamPrevHybridGenerationRecoveryCurrentBaseBehaviors": 1
    },
    "nativeImplementation": "current-base native PHP markerPDF behavior includes page-level /Contents partial-extraction diagnostics, qpdf-derived AES-256 revision-6 encrypted permission fixture review, password-handling diagnostics, encrypted crypt-filter content-role preflight, Image XObject CTM placement review, indirect encrypted permission operand review, XMP/current-trailer metadata, inline-image decode boundaries, xref repair, generation-exact named destinations, attachment review, marker-app preview PageLabels /Limits and indirect label-operand enforcement, and optional pdftext dictionary_output(sort=true) row/column block ordering; full upstream Python/model runner parity remains unavailable under the no-GPU scope; runtime batch preflight records output-folder regular-file conflicts before metadata/model/pool planning and convert.py stdout conversion-summary ordering before task tuple construction or pool launch",
    "wordpressScenario": "WordPress PDF imports now cover qpdf-derived AES-256 revision-6 encrypted permission fixtures that stop at encrypted preflight with password-readiness diagnostics, encrypted Standard crypt-filter content-role preflight for StmF/StrF/EFF identity, encrypted, and missing filters before import decisions, Image XObject q/Q/cm and nested Form XObject /Matrix placement bboxes for review-only media handoff, indirect encrypted permission and crypt-filter review, catalog XMP plus current-trailer Info/ID metadata, inline ASCII85/Flate image decode boundaries, classic xref rebuild, xref /Prev repair, generation-exact native named-destination extraction, attachment/FileSpec summaries, marker-app preview PageLabels /Limits and indirect /S /P /St operand boundaries, pdftext dictionary_output span-text normalization and optional sort=true row/column ordering, current incremental /Prev xref content selection, single-document runtime admission review, Type3/font-width boundaries, page-resource inheritance, clipped table grid geometry, malformed BI tokenizer recovery, pdftext keep_chars=false sanitation, indirect xref-stream W/Index/Size helper resolution, and the previously accepted native no-GPU markerPDF paths while keeping model execution, raw payload bytes, action/XFA operands, image payloads, encrypted associated-file text, stale xref data, mismatched-generation destination rows, and parser noise out of visible content.",
    "nextTask": "Continue native no-GPU markerPDF/Pandoc supervision from clean origin/main worktrees, integrating ready handoffs serially only after focused verification and preserving the bounded worker swarm.",
    "latestRunAddendum": "2026-06-20 UTC refinery verification for plib-tuzwg.14 image-only OCR handoff on current main: focused supplied-document gate 1 file / 848 assertions / 0 failures; adjacent OCR/converter-language gate 5 files / 931 assertions / 0 failures; Pandoc registry smoke 2 files / 2585 assertions / 0 failures; php -l, JSON, whitespace, and conflict-marker checks passed. No live OCR/model runtime, multiprocessing, Python/PDFium/PIL, or external PDF tools were invoked.",
    "wordpressScenarios": 1097,
    "inventory": {
        "mappedInlineImageReviewBehaviors": 1,
        "mappedMultilineOcrTableHeaderBehaviors": 1,
        "mappedPageAssociatedChecksumBehaviors": 1,
        "mappedPublicKeyRecipientPermissionBehaviors": 1,
        "mappedPdfEncryptedPermissionCryptFilterPreflightCurrentBaseBehaviors": 1,
        "mappedSimpleFontIndirectEncodingWidthBehaviors": 1,
        "mappedSimpleFontAverageWidthAdvanceBehaviors": 1,
        "mappedTrailerEncryptIdPrecedenceBehaviors": 1,
        "mappedXrefZeroWidthMemberIndexBehaviors": 1,
        "mappedAcroFormWidgetActionCycleBehaviors": 1,
        "mappedArticleThreadNavigationBehaviors": 1,
        "mappedStructureTreeLanguageReviewBehaviors": 1,
        "mappedPdfStructTreeAssociatedFilesPageReviewBehaviors": 1,
        "mappedPdfType3CharProcDescriptorMissingWidthBehaviors": 1,
        "mappedPdfXrefPrevObjectStreamGenerationBehaviors": 1,
        "mappedPdfPageStructParentsResourcesTransitionLabelBehaviors": 1,
        "mappedPdfDeviceNIccSoftMaskStreamPreviewBehaviors": 1,
        "mappedPdfOutlineNamedActionPageReviewBehaviors": 1,
        "mappedPdfParserIndirectFilterDecodeParmsOwnerBehaviors": 1,
        "mappedPdfImageSoftMaskColorKeyIccConflictBehaviors": 1,
        "mappedPdfPublicKeyDssPermissionReviewBehaviors": 1,
        "mappedPdfXrefHybridDirectGenerationRepairBehaviors": 1,
        "mappedTableOcrMergedCellHeaderAxisReviewBehaviors": 1,
        "mappedPdfAcroFormXfaAppearanceReviewBehaviors": 1,
        "mappedPdfCurrentXrefMetadataOutputIntentAssociatedFileReviewBehaviors": 1,
        "mappedPdfObjectStreamInlineImageFilterRepairBehaviors": 1,
        "mappedTableGridBorderOcrConflictReviewBehaviors": 1,
        "mappedPdfSecurityLaunchUriCertPermissionReviewBehaviors": 1,
        "mappedPdfInlineJpxSmaskDecodeReviewBehaviors": 1,
        "mappedPdfOutlineOpenActionThreadPieceInfoReviewBehaviors": 1,
        "mappedPdfAcroFormWidgetRichTextActionResourceReviewBehaviors": 1,
        "mappedMarkerBenchmarkOutputBoundaryBehaviors": 1,
        "mappedPdfMetadataDssOutputIntentNameTreeReviewBehaviors": 1,
        "mappedPdfPageStructParentsAssociatedThreadsReviewBehaviors": 1,
        "mappedPdfIndexedSeparationSoftMaskReviewBehaviors": 1,
        "mappedPdfCMapUseCMapVerticalWidthReviewBehaviors": 1,
        "mappedPdfParserStreamFilterXrefOwnerCurrentBaseBehaviors": 1,
        "mappedPdfSoftMaskIndexedJpxBoundaryCurrentBaseBehaviors": 1,
        "mappedPdfSecurityDssActionByteRangeCurrentBaseBehaviors": 1,
        "mappedPdfMetadataPieceInfoAssociatedOutputIntentCurrentBaseBehaviors": 1,
        "mappedPdfAcroFormWidgetAppearanceCharacteristicsCurrentBaseBehaviors": 1,
        "mappedPdfPageStructParentsPieceInfoThreadCurrentBaseBehaviors": 1,
        "mappedPdfFontCidUseCMapWidthCurrentBaseBehaviors": 1,
        "mappedPdfParserInlineStreamOwnerCurrentBaseBehaviors": 1,
        "mappedPdfInlineImageTokenizerBoundaryCurrentBaseBehaviors": 1,
        "mappedMarkerBenchmarkRuntimeApiCurrentBaseBehaviors": 1,
        "mappedPdfOutlineDestinationActionContextCurrentBaseBehaviors": 1,
        "mappedPdfXrefCurrentBaseRepairBoundaryBehaviors": 1,
        "mappedPdfSecurityAcroFormPermissionActionCurrentBaseBehaviors": 1,
        "mappedPdfAcroFormXfaWidgetCurrentBaseBehaviors": 1,
        "mappedTableOcrPolygonGeometryCurrentBaseBehaviors": 1,
        "mappedPdfMetadataCatalogNameTreeCurrentBaseBehaviors": 1,
        "mappedPdfImageNamedColorSpaceSmaskCurrentBaseBehaviors": 1,
        "mappedMarkerRuntimeConversionBoundaryCurrentBaseBehaviors": 1,
        "mappedMarkerRuntimeSinglePreflightBoundaryCurrentBaseBehaviors": 1,
        "mappedPdfAcroFormActionBoundaryCurrentBaseBehaviors": 1,
        "mappedPdfFontCidEncodingWidthCurrentBaseBehaviors": 1,
        "mappedPdfImageInlineMaskPreviewCurrentBaseBehaviors": 1,
        "mappedPdfMetadataAssociatedRelatedFilesCurrentBaseBehaviors": 1,
        "mappedPdfOutlineNameTreeLimitsCurrentBaseBehaviors": 1,
        "mappedPdfPageAnnotationThreadCurrentBaseBehaviors": 1,
        "mappedPdfParserXrefStreamBoundaryCurrentBaseBehaviors": 1,
        "mappedPdfSecurityDssSignatureCurrentBaseBehaviors": 1,
        "mappedPdfXrefObjectStreamDuplicateZeroWidthCurrentBaseBehaviors": 1,
        "mappedTableGridBorderAssignedReviewCurrentBaseBehaviors": 1,
        "mappedMarkerBenchmarkScoreVerifierCurrentBaseBehaviors": 1,
        "mappedPdfAcroFormSeedLockActionsCurrentBaseBehaviors": 1,
        "mappedPdfImageDeviceNTransferMaskCurrentBaseBehaviors": 1,
        "mappedPdfPageWidgetLinkCurrentBaseBehaviors": 1,
        "mappedPdfParserXrefObjectStreamFilterCurrentBaseBehaviors": 1,
        "mappedPdfPortfolioPieceInfoCurrentBaseBehaviors": 1,
        "mappedPdfSecurityPublicKeyPermissionCurrentBaseBehaviors": 1,
        "mappedPdfFontType3CidSetCMapCurrentBaseBehaviors": 1,
        "mappedPdfXrefHybridGenerationRepairCurrentBaseBehaviors": 1,
        "mappedMarkerRuntimeConvertBenchmarkErrorsCurrentBaseBehaviors": 1,
        "mappedPdfImageColorKeySoftMaskJpxCurrentBaseBehaviors": 1,
        "mappedPdfSecurityEncryptSignatureByteRangeCurrentBaseBehaviors": 1,
        "mappedPdfXrefObjectStreamGenerationPrevCurrentBaseBehaviors": 1,
        "mappedPdfFontCMapBfrangeSurrogateWidthCurrentBaseBehaviors": 1,
        "mappedPdfOutlineDestinationFitActionChainCurrentBaseBehaviors": 1,
        "mappedPdfParserStreamLengthStartxrefRecoveryCurrentBaseBehaviors": 1,
        "mappedPdfAcroFormWidgetAppearanceStateCurrentBaseBehaviors": 1,
        "mappedPdfMetadataPieceInfoAssociatedXmpCurrentBaseBehaviors": 1,
        "mappedPdfPageStructParentsThreadMarkupCurrentBaseBehaviors": 1,
        "mappedTableSpansOcrGridGeometryCurrentBaseBehaviors": 1,
        "pdfCalibratedJbig2SoftMaskCurrentBaseBehaviors": 1,
        "mappedPdfCalibratedJbig2SoftMaskCurrentBaseBehaviors": 1,
        "markerServerConvertErrorBoundaryCurrentBaseBehaviors": 1,
        "mappedMarkerServerConvertErrorBoundaryCurrentBaseBehaviors": 1,
        "pdfXrefTrailerEncryptPrevCurrentBaseBehaviors": 1,
        "mappedPdfXrefTrailerEncryptPrevCurrentBaseBehaviors": 1,
        "pdfType0VerticalUseCMapCidSetCurrentBaseBehaviors": 1,
        "mappedPdfType0VerticalUseCMapCidSetCurrentBaseBehaviors": 1,
        "pdfAcroFormRichTextXfaActionStateCurrentBaseBehaviors": 1,
        "mappedPdfAcroFormRichTextXfaActionStateCurrentBaseBehaviors": 1,
        "pdfOutlineRemoteGoToETransitionCurrentBaseBehaviors": 1,
        "mappedPdfOutlineRemoteGoToETransitionCurrentBaseBehaviors": 1,
        "pdfParserXrefStreamObjectOwnerCycleCurrentBaseBehaviors": 1,
        "mappedPdfParserXrefStreamObjectOwnerCycleCurrentBaseBehaviors": 1,
        "tableOcrMergedHeaderGridCurrentBaseBehaviors": 1,
        "mappedTableOcrMergedHeaderGridCurrentBaseBehaviors": 1,
        "pdfPageAnnotationStructParentAssociatedCurrentBaseBehaviors": 1,
        "mappedPdfPageAnnotationStructParentAssociatedCurrentBaseBehaviors": 1,
        "markerRuntimeServerRemotePollingErrorCurrentBaseBehaviors": 1,
        "mappedMarkerRuntimeServerRemotePollingErrorCurrentBaseBehaviors": 1,
        "pdfSecurityDssCertActionPermissionCurrentBaseBehaviors": 1,
        "mappedPdfSecurityDssCertActionPermissionCurrentBaseBehaviors": 1,
        "pdfMetadataNameTreePieceInfoOutputIntentCurrentBaseBehaviors": 1,
        "mappedPdfMetadataNameTreePieceInfoOutputIntentCurrentBaseBehaviors": 1,
        "pdfImageDeviceGraySmaskTransferCurrentBaseBehaviors": 1,
        "mappedPdfImageDeviceGraySmaskTransferCurrentBaseBehaviors": 1,
        "pdfFontType0CidSetDescriptorDefaultCurrentBaseBehaviors": 1,
        "mappedPdfFontType0CidSetDescriptorDefaultCurrentBaseBehaviors": 1,
        "pdfOutlineLaunchThreadTransitionContextCurrentBaseBehaviors": 1,
        "mappedPdfOutlineLaunchThreadTransitionContextCurrentBaseBehaviors": 1,
        "markerRuntimeBenchmarkCallbackSandboxCurrentBaseBehaviors": 1,
        "mappedMarkerRuntimeBenchmarkCallbackSandboxCurrentBaseBehaviors": 1,
        "pdfAcroFormSubmitResetRichTextResourceCurrentBaseBehaviors": 1,
        "mappedPdfAcroFormSubmitResetRichTextResourceCurrentBaseBehaviors": 1,
        "pdfParserStreamTokenBoundaryCurrentBaseBehaviors": 1,
        "mappedPdfParserStreamTokenBoundaryCurrentBaseBehaviors": 1,
        "pdfPageArtifactMarkedContentClipCurrentBaseBehaviors": 1,
        "mappedPdfPageArtifactMarkedContentClipCurrentBaseBehaviors": 1,
        "pdfMetadataPortfolioPieceInfoOutputIntentCurrentBaseBehaviors": 1,
        "mappedPdfMetadataPortfolioPieceInfoOutputIntentCurrentBaseBehaviors": 1,
        "pdfImageIndexedDeviceNSoftMaskTransferCurrentBaseBehaviors": 1,
        "mappedPdfImageIndexedDeviceNSoftMaskTransferCurrentBaseBehaviors": 1,
        "pdfXrefPrevObjectStreamGenerationCurrentBaseBehaviors": 1,
        "mappedPdfXrefPrevObjectStreamGenerationCurrentBaseBehaviors": 1,
        "markerRuntimeAppConfigBoundaryCurrentBaseBehaviors": 1,
        "mappedMarkerRuntimeAppConfigBoundaryCurrentBaseBehaviors": 1,
        "pdfOutlineRemoteThreadActionStackCurrentBaseBehaviors": 1,
        "mappedPdfOutlineRemoteThreadActionStackCurrentBaseBehaviors": 1,
        "pdfMetadataTrailerIdLangViewerPreferenceCurrentBaseBehaviors": 1,
        "mappedPdfMetadataTrailerIdLangViewerPreferenceCurrentBaseBehaviors": 1,
        "pdfXrefLinearizedPrevHintStartxrefCurrentBaseBehaviors": 1,
        "mappedPdfXrefLinearizedPrevHintStartxrefCurrentBaseBehaviors": 1,
        "pdfRichMediaScreenSelectorRenditionCurrentBaseBehaviors": 1,
        "mappedPdfRichMediaScreenSelectorRenditionCurrentBaseBehaviors": 1,
        "pdfAnnotationActionReferenceCoverageCurrentBaseBehaviors": 2,
        "mappedPdfAnnotationActionReferenceCoverageCurrentBaseBehaviors": 2,
        "suppliedImageOnlyOcrHandoffCurrentBaseBehaviors": 1,
        "mappedSuppliedImageOnlyOcrHandoffCurrentBaseBehaviors": 1,
        "pdfImageDeviceNSeparationSmaskDecodeCurrentBaseBehaviors": 1,
        "mappedPdfImageDeviceNSeparationSmaskDecodeCurrentBaseBehaviors": 1,
        "pdfMetadataPortfolioAssociatedPieceInfoChecksumCurrentBaseBehaviors": 1,
        "mappedPdfMetadataPortfolioAssociatedPieceInfoChecksumCurrentBaseBehaviors": 1,
        "tableRotatedHeaderAccessibilityGridCurrentBaseBehaviors": 1,
        "mappedTableRotatedHeaderAccessibilityGridCurrentBaseBehaviors": 1,
        "pdfFontType3ColorGlyphResourceWidthCurrentBaseBehaviors": 1,
        "mappedPdfFontType3ColorGlyphResourceWidthCurrentBaseBehaviors": 1,
        "pdfFontType3CharProcsGenerationBoundaryCurrentBaseBehaviors": 1,
        "mappedPdfFontType3CharProcsGenerationBoundaryCurrentBaseBehaviors": 1,
        "layoutPageHeaderFooterRotatedColumnsCurrentBaseBehaviors": 1,
        "mappedLayoutPageHeaderFooterRotatedColumnsCurrentBaseBehaviors": 1,
        "pdfParserObjectStreamGenerationOffsetOwnerCurrentBaseBehaviors": 1,
        "mappedPdfParserObjectStreamGenerationOffsetOwnerCurrentBaseBehaviors": 1,
        "pdfMetadataXmpLangMarkInfoCatalogCurrentBaseBehaviors": 1,
        "mappedPdfMetadataXmpLangMarkInfoCatalogCurrentBaseBehaviors": 1,
        "pdfOutlineDirectNamedThreadActionCurrentBaseBehaviors": 1,
        "mappedPdfOutlineDirectNamedThreadActionCurrentBaseBehaviors": 1,
        "pdfXrefObjectStreamPrevGenerationRebuildCurrentBaseBehaviors": 1,
        "mappedPdfXrefObjectStreamPrevGenerationRebuildCurrentBaseBehaviors": 1,
        "pdfPageAssociatedFilesMarkedContentAltCurrentBaseBehaviors": 1,
        "mappedPdfPageAssociatedFilesMarkedContentAltCurrentBaseBehaviors": 1,
        "markerRuntimeServerUploadPaginationErrorBoundaryCurrentBaseBehaviors": 1,
        "mappedMarkerRuntimeServerUploadPaginationErrorBoundaryCurrentBaseBehaviors": 1,
        "markerRuntimePreviewArtifactBoundaryCurrentBaseBehaviors": 1,
        "mappedMarkerRuntimePreviewArtifactBoundaryCurrentBaseBehaviors": 1,
        "pdfAttachmentFileSpecAFRelationshipChecksumCurrentBaseBehaviors": 1,
        "mappedPdfAttachmentFileSpecAFRelationshipChecksumCurrentBaseBehaviors": 1,
        "pdfFontSimpleType3CMapSpacingCurrentBaseBehaviors": 1,
        "mappedPdfFontSimpleType3CMapSpacingCurrentBaseBehaviors": 1,
        "pdfFontType0Type3BroaderBehaviorCurrentBaseBehaviors": 2,
        "mappedPdfFontType0Type3BroaderBehaviorCurrentBaseBehaviors": 2,
        "tableOcrStructureAssignmentRegressionCurrentBaseBehaviors": 1,
        "mappedTableOcrStructureAssignmentRegressionCurrentBaseBehaviors": 1,
        "markerRuntimeServerBenchmarkOutputErrorRoundtripCurrentBaseBehaviors": 1,
        "mappedMarkerRuntimeServerBenchmarkOutputErrorRoundtripCurrentBaseBehaviors": 1,
        "pdfAcroFormSignatureXfaWidgetActionBoundaryCurrentBaseBehaviors": 1,
        "mappedPdfAcroFormSignatureXfaWidgetActionBoundaryCurrentBaseBehaviors": 1,
        "pdfOutlineNamedDestinationTransitionThreadSecurityCurrentBaseBehaviors": 1,
        "mappedPdfOutlineNamedDestinationTransitionThreadSecurityCurrentBaseBehaviors": 1,
        "pdfPageStructTreeMarkedContentAssociatedFilesCurrentBaseBehaviors": 1,
        "mappedPdfPageStructTreeMarkedContentAssociatedFilesCurrentBaseBehaviors": 1,
        "pdfParserSecurityXrefFilterErrorBoundaryCurrentBaseBehaviors": 1,
        "mappedPdfParserSecurityXrefFilterErrorBoundaryCurrentBaseBehaviors": 1,
        "pdfXrefHybridLinearizedObjectStreamGenerationCurrentBaseBehaviors": 1,
        "mappedPdfXrefHybridLinearizedObjectStreamGenerationCurrentBaseBehaviors": 1,
        "mappedPdfSecurityAcroFormDssActionAttachmentBundleCurrentBaseBehaviors": 1,
        "mappedPdfParserXrefStreamCompressedOperandOwnerCurrentBaseBehaviors": 1,
        "mappedPdfImageColorSpaceMaskInlineOutputPreviewCurrentBaseBehaviors": 1,
        "mappedPdfAcroFormWidgetXfaActionAppearanceValueCurrentBaseBehaviors": 1,
        "mappedPdfFontCidWidthResourceSpacingCurrentBaseBehaviors": 1,
        "mappedMarkerRuntimeServerUploadBenchmarkErrorCurrentBaseBehaviors": 1,
        "mappedOutputMarkdownImageArtifactQualityCurrentBaseBehaviors": 1,
        "mappedPdfXrefObjectStreamHybridGenerationOwnerCurrentBaseBehaviors": 1,
        "mappedTableOcrBenchmarkSpanGridQualityCurrentBaseBehaviors": 1,
        "mappedPdfOutlineDestinationThreadActionMetadataCurrentBaseBehaviors": 1,
        "mappedTableOcrRotatedHeaderCaptionCurrentBaseBehaviors": 1,
        "mappedPdfXrefStreamPrevIndexWidthRepairCurrentBaseBehaviors": 1,
        "mappedPdfSecurityPermissionDssActionChainCurrentBaseBehaviors": 1,
        "mappedMarkerRuntimeConvertServerUploadPaginationCurrentBaseBehaviors": 1,
        "mappedPdfFontType0CidSetVerticalSpacingCurrentBaseBehaviors": 1,
        "mappedPdfAcroFormWidgetAppearanceExportCurrentBaseBehaviors": 1,
        "mappedPdfMetadataXmpNameTreeAssociatedSchemaCurrentBaseBehaviors": 1,
        "mappedPdfImageColorSpaceSmaskJpeg2000OutputCurrentBaseBehaviors": 1,
        "mappedPdfPageAnnotationAssociatedStructTreeReviewCurrentBaseBehaviors": 1,
        "mappedPdfParserInlineStreamJpxCMapRepairCurrentBaseBehaviors": 1,
        "markerRuntimeEncryptedPdfPreflightShortCircuitCurrentBaseBehaviors": 1,
        "mappedMarkerRuntimeEncryptedPdfPreflightShortCircuitCurrentBaseBehaviors": 1,
        "pdfAttachmentEmbeddedFilesAnnotationSummaryCurrentBaseBehaviors": 1,
        "mappedPdfAttachmentEmbeddedFilesAnnotationSummaryCurrentBaseBehaviors": 1,
        "pdfAttachmentPageAssociatedFilePreflightCurrentBaseBehaviors": 1,
        "mappedPdfAttachmentPageAssociatedFilePreflightCurrentBaseBehaviors": 1,
        "mappedPdfTextDictionaryPostprocessCurrentBaseBehaviors": 1,
        "mappedPdfTextDictionarySortBoundaryCurrentBaseBehaviors": 1,
        "mappedPdfNamedDestinationExtractorCurrentBaseBehaviors": 3,
        "mappedMarkerAppPreviewPageLabelsCurrentBaseBehaviors": 5,
        "mappedPdfXrefPrevIncrementalContentEvidenceCurrentBaseBehaviors": 1,
        "mappedMarkerRuntimeMainPreflightBoundaryCurrentBaseBehaviors": 3,
        "mappedPdfPagePartialExtractionDiagnosticsCurrentBaseBehaviors": 2
    },
    "pdfParserNameArrayCommentBoundaryCurrentBase": "PDF comments inside optional-content arrays are skipped before indirect-reference and escaped-name scanning, preventing comment-only hidden layer references from affecting WordPress text extraction.",
    "pdfParserNameArrayCommentBoundaryCurrentBaseBehaviors": 1,
    "mappedPdfParserNameArrayCommentBoundaryCurrentBaseBehaviors": 1,
    "pdfFontType3CharProcToUnicodeCurrentBase": "Type3 fonts without /ToUnicode recover Unicode from standard /CharProcs glyph names selected through an Encoding CMap while excluding CharProc drawing payload text.",
    "pdfFontType3CharProcToUnicodeCurrentBaseBehaviors": 1,
    "mappedPdfFontType3CharProcToUnicodeCurrentBaseBehaviors": 1,
    "pdfFontType3CharProcsGenerationBoundaryCurrentBase": "Type3 /CharProcs entries preserve exact indirect object generations before d0/d1 glyph-width extraction, so same-number CharProc streams can drive different WordPress text grouping boundaries without exposing CharProc payload text.",
    "pdfFontType3CharProcsGenerationBoundaryCurrentBaseBehaviors": 1,
    "mappedPdfFontType3CharProcsGenerationBoundaryCurrentBaseBehaviors": 1,
    "tableSpanGridSectionCaptionCurrentBase": "Supplied OCR span-grid tables carry nearest section heading and caption context into table review metadata while preserving colspan, rowspan, header ids, and stale pdftext exclusion for WordPress rendering.",
    "tableSpanGridSectionCaptionCurrentBaseBehaviors": 1,
    "mappedTableSpanGridSectionCaptionCurrentBaseBehaviors": 1,
    "pdfPageThreadStructTreeAssociatedFileCurrentBase": "Page article-thread bead rows carry target StructTree MCIDs, roles, marked-content rows, and associated FileSpec provenance as review metadata without leaking attachment payload bytes.",
    "pdfPageThreadStructTreeAssociatedFileCurrentBaseBehaviors": 1,
    "mappedPdfPageThreadStructTreeAssociatedFileCurrentBaseBehaviors": 1,
    "pdfSecurityPermissionByteRangeFieldMdpCurrentBase": "FieldMDP target field objects and widgets are correlated with signature ByteRange signed-revision coverage before WordPress import, keeping unsigned current-revision field values and signature bytes out of visible text and security JSON.",
    "pdfSecurityPermissionByteRangeFieldMdpCurrentBaseBehaviors": 1,
    "mappedPdfSecurityPermissionByteRangeFieldMdpCurrentBaseBehaviors": 1,
    "pdfImageJpeg2000MaskDecodePreviewCurrentBase": "JPEG2000 /ImageMask Decode preview rows are derived from supplied one-bit JPX mask samples while raw JPX raster payloads remain review-only.",
    "pdfImageJpeg2000MaskDecodePreviewCurrentBaseBehaviors": 3,
    "markerRuntimeConversionReportMemoryErrorCurrentBase": "Benchmark conversion reports preserve fail-soft memory snapshot dump failures while continuing score, report, and Markdown output without CUDA/model execution.",
    "markerRuntimeConversionReportMemoryErrorCurrentBaseBehaviors": 1,
    "pdfAcroFormFieldActionSubmitResetResourceCurrentBase": "Field-level AcroForm SubmitForm and ResetForm action resource reviews summarize selected fields, default appearance resources, no-export omission, reset defaults, and FileSpec targets without executing actions or exposing payload text.",
    "pdfAcroFormFieldActionSubmitResetResourceCurrentBaseBehaviors": 1,
    "pdfMetadataPdfaAssociatedNameTreeCurrentBase": "Catalog /Names /EmbeddedFiles rows are summarized as PDF/A associated-file review metadata when a root PDF/A OutputIntent is present, preserving root and attachment-local OutputIntent provenance while omitting embedded payloads from visible WordPress text.",
    "pdfMetadataPdfaAssociatedNameTreeCurrentBaseBehaviors": 1,
    "mappedPdfMetadataPdfaAssociatedNameTreeCurrentBaseBehaviors": 1,
    "pdfParserObjectStreamStreamDictGenerationCurrentBase": "Object-stream dictionaries expose review metadata proving indirect /N, /First, /Length, /Filter, and /DecodeParms helpers are resolved through current xref-selected generations before WordPress paragraph extraction.",
    "pdfParserObjectStreamStreamDictGenerationCurrentBaseBehaviors": 1,
    "mappedPdfParserObjectStreamStreamDictGenerationCurrentBaseBehaviors": 1,
    "pdfXrefPrevHybridSizeGenerationCurrentBase": "Hybrid xref table /Prev chains with underdeclared xref-stream Size rows preserve trailer Root generation references before WordPress paragraph rendering, excluding stale same-object generation-zero catalog text.",
    "pdfXrefPrevHybridSizeGenerationCurrentBaseBehaviors": 1,
    "mappedPdfXrefPrevHybridSizeGenerationCurrentBaseBehaviors": 1,
    "pdfXrefStreamGenerationIndexRepairCurrentBase": "Malformed sparse xref-stream /Index rows whose explicit type-1 offsets point at current direct-object headers are repaired to those object numbers before older /Prev rows merge, preserving current generation-one content and excluding stale WordPress paragraph text.",
    "pdfXrefStreamGenerationIndexRepairCurrentBaseBehaviors": 1,
    "mappedPdfXrefStreamGenerationIndexRepairCurrentBaseBehaviors": 1,
    "pdfPageFormXObjectStructTreeClipCurrentBase": "Page clipping paths remain active when invoked Form XObject marked-content MCIDs are replayed through StructTree ordering, preventing clipped glyphs and clipped ActualText replacements from leaking into WordPress paragraphs.",
    "pdfPageFormXObjectStructTreeClipCurrentBaseBehaviors": 1,
    "mappedPdfPageFormXObjectStructTreeClipCurrentBaseBehaviors": 1,
    "pdfParserCommentArrayDictStringTokenCurrentBase": "PDF comments inside array, dictionary, destination-reference, and indirect string-token boundaries are treated as whitespace before current-base WordPress text extraction, while percent signs inside literal strings remain visible content.",
    "pdfParserCommentArrayDictStringTokenCurrentBaseBehaviors": 1,
    "mappedPdfParserCommentArrayDictStringTokenCurrentBaseBehaviors": 1,
    "pdfLinkAnnotationTargetContextCurrentBase": "Local link annotation destination reviews combine XMP date normalization, matching outline titles, page labels, and target page transition/action rows without executing actions or exposing XMP/action operands as visible text.",
    "pdfLinkAnnotationTargetContextCurrentBaseBehaviors": 1,
    "pdfXrefHybridObjectStreamFreeOwnerCurrentBase": "Hybrid xref table free rows are authoritative over companion xref-stream type-2 rows for the same object number, keeping suppressed object-stream members review-only and out of visible WordPress text.",
    "pdfXrefHybridObjectStreamFreeOwnerCurrentBaseBehaviors": 1,
    "pdfOutlineStructureDestinationActionContextCurrentBase": "Structured outline rows expose action-backed destination context and target page review metadata, including action chains and target page labels/transitions, without executing PDF actions or leaking action operands to visible WordPress text.",
    "pdfOutlineStructureDestinationActionContextCurrentBaseBehaviors": 1,
    "mappedPdfOutlineStructureDestinationActionContextCurrentBaseBehaviors": 1,
    "pdfOutlineNameTreeActionStructureCurrentBase": "Name-tree destination action rows carry collapsed outline structure, style/color metadata, chained action review state, and target StructTree context without executing actions or promoting hidden operands to visible text.",
    "pdfOutlineNameTreeActionStructureCurrentBaseBehaviors": 1,
    "mappedPdfOutlineNameTreeActionStructureCurrentBaseBehaviors": 1,
    "pdfAcroFormWidgetActionResourceAppearanceCurrentBase": "Selected AcroForm widget appearance resource XObject action dictionaries are exposed as review-only metadata while field /V remains authoritative and JavaScript/URI/Hide action payloads stay non-executing.",
    "pdfAcroFormWidgetActionResourceAppearanceCurrentBaseBehaviors": 1,
    "mappedPdfAcroFormWidgetActionResourceAppearanceCurrentBaseBehaviors": 1,
    "pdfXrefObjectStreamPrevFreeCurrentBase": "Current xref-stream type-2 object-stream member rows keep the direct current /ObjStm carrier when a stale previous /Prev row marks that carrier free, preserving current page text before WordPress import.",
    "pdfXrefObjectStreamPrevFreeCurrentBaseBehaviors": 1,
    "mappedPdfXrefObjectStreamPrevFreeCurrentBaseBehaviors": 1,
    "pdfImageIccSoftMaskDecodeTransparencyCurrentBase": "ICCBased image stream samples and current-object grayscale SMask streams are decoded into bounded review rows before RGB preview planning, preserving Decode ranges and soft-mask alpha without raster execution.",
    "pdfImageIccSoftMaskDecodeTransparencyCurrentBaseBehaviors": 1,
    "tableOcrRowspanCaptionAccessibilityCurrentBase": "Forced-OCR rowspanned table headers bind preserved markerPDF Caption blocks to accessible WordPress table ids and headers attributes while keeping stale pdftext table lines excluded.",
    "tableOcrRowspanCaptionAccessibilityCurrentBaseBehaviors": 1,
    "mappedTableOcrRowspanCaptionAccessibilityCurrentBaseBehaviors": 1,
    "pdfParserNameTreeStreamFilterOwnerCurrentBase": "Catalog name-tree JavaScript payload streams keep current xref-selected stream-filter owners before WordPress import; stale detached streams remain review-only.",
    "pdfParserNameTreeStreamFilterOwnerCurrentBaseBehaviors": 1,
    "mappedPdfParserNameTreeStreamFilterOwnerCurrentBaseBehaviors": 1,
    "pdfPageAnnotationStructParentAssociatedActionCurrentBase": "Current page annotation action rows carry StructParent, StructElem, and associated FileSpec context before WordPress rendering without executing actions.",
    "pdfPageAnnotationStructParentAssociatedActionCurrentBaseBehaviors": 1,
    "mappedPdfPageAnnotationStructParentAssociatedActionCurrentBaseBehaviors": 1,
    "pdfSecurityDssSignatureReferenceTransformCurrentBase": "Matched DSS VRI rows carry signature Reference DocMDP FieldMDP and UR3 transform summaries for review without validation or signing.",
    "pdfSecurityDssSignatureReferenceTransformCurrentBaseBehaviors": 1,
    "mappedPdfSecurityDssSignatureReferenceTransformCurrentBaseBehaviors": 1,
    "pdfMetadataXmpOutputIntentNameTreeCurrentBase": "Catalog /Names value dictionaries summarize nested /Metadata XMP and /OutputIntents without promotion to document roots or action execution.",
    "pdfMetadataXmpOutputIntentNameTreeCurrentBaseBehaviors": 1,
    "mappedPdfMetadataXmpOutputIntentNameTreeCurrentBaseBehaviors": 1,
    "pdfParserObjectStreamFilterDictGenerationCurrentBase": "Object-stream /Filter operands use current xref-selected generations but dictionary-shaped filters are rejected before WordPress import.",
    "pdfParserObjectStreamFilterDictGenerationCurrentBaseBehaviors": 1,
    "mappedPdfParserObjectStreamFilterDictGenerationCurrentBaseBehaviors": 1,
    "pdfAcroFormSubmitResetAppearanceLockCurrentBase": "AcroForm SubmitForm/ResetForm field selections, widget /AP state, and signed /Lock scope are imported as review metadata only.",
    "pdfAcroFormSubmitResetAppearanceLockCurrentBaseBehaviors": 1,
    "mappedPdfAcroFormSubmitResetAppearanceLockCurrentBaseBehaviors": 1,
    "pdfOutlineActionNameTreePageReviewCurrentBase": "Outline GoTo actions whose destination resolves through /Names /Dests to a Thread action expose target-page review metadata without creating visible TOC rows.",
    "pdfOutlineActionNameTreePageReviewCurrentBaseBehaviors": 1,
    "mappedPdfOutlineActionNameTreePageReviewCurrentBaseBehaviors": 1,
    "pdfXrefStreamObjectOwnerFreeEntryCurrentBase": "Current xref-stream type-0 free rows own object numbers before stale direct scans and previous object-stream members.",
    "pdfXrefStreamObjectOwnerFreeEntryCurrentBaseBehaviors": 1,
    "mappedPdfXrefStreamObjectOwnerFreeEntryCurrentBaseBehaviors": 1,
    "pdfImageJpxSmaskColorSpacePdfaCurrentBase": "JPXDecode image review preserves embedded/external soft-mask metadata and PDF/A OutputIntent color context without raster execution.",
    "pdfImageJpxSmaskColorSpacePdfaCurrentBaseBehaviors": 1,
    "mappedPdfImageJpxSmaskColorSpacePdfaCurrentBaseBehaviors": 1,
    "pdfParserTrailerXrefNameCommentCurrentBase": "Xref-table trailer parsing ignores comment-only trailer dictionaries and decodes escaped trailer names before WordPress text extraction.",
    "pdfParserTrailerXrefNameCommentCurrentBaseBehaviors": 1,
    "mappedPdfParserTrailerXrefNameCommentCurrentBaseBehaviors": 1,
    "pdfPageStructTreeAnnotationPieceInfoCurrentBase": "Page PieceInfo composes with page StructParents and annotation StructParent OBJR rows before WordPress import review.",
    "pdfPageStructTreeAnnotationPieceInfoCurrentBaseBehaviors": 1,
    "mappedPdfPageStructTreeAnnotationPieceInfoCurrentBaseBehaviors": 1,
    "ocrLanguageConfidenceTriageCurrentBase": "OCR recognition preserves language and confidence review metadata while page replacement remains governed by upstream text-quality triage.",
    "ocrLanguageConfidenceTriageCurrentBaseBehaviors": 1,
    "mappedOcrLanguageConfidenceTriageCurrentBaseBehaviors": 1,
    "outputMarkdownArtifactSanitizeImagesCurrentBase": "Output artifact saving sanitizes image filenames, deduplicates collisions, and rewrites markdown metadata references before WordPress import.",
    "outputMarkdownArtifactSanitizeImagesCurrentBaseBehaviors": 1,
    "mappedOutputMarkdownArtifactSanitizeImagesCurrentBaseBehaviors": 1,
    "runtimeBenchmarkArtifactErrorJsonCurrentBase": "Benchmark report write failures persist review-only error JSON artifacts without executing Python, CUDA, models, Nougat, or external PDF tools.",
    "runtimeBenchmarkArtifactErrorJsonCurrentBaseBehaviors": 1,
    "mappedRuntimeBenchmarkArtifactErrorJsonCurrentBaseBehaviors": 1,
    "pdfFontCidCMapWidthsVerticalWritingCurrentBase": "Vertical writing-mode Type0 CMap CID widths preserve Td movement and word gaps before WordPress paragraph extraction.",
    "pdfFontCidCMapWidthsVerticalWritingCurrentBaseBehaviors": 1,
    "mappedPdfFontCidCMapWidthsVerticalWritingCurrentBaseBehaviors": 1,
    "layoutRotatedReadingOrderBlockBboxCurrentBase": "Rotated PDF page-space block bboxes are mapped before reading-order matching so WordPress imports preserve source order across rotated page geometry.",
    "layoutRotatedReadingOrderBlockBboxCurrentBaseBehaviors": 1,
    "mappedLayoutRotatedReadingOrderBlockBboxCurrentBaseBehaviors": 1,
    "pdfMetadataTrailerIdLangViewerPreferenceCurrentBase": "Current xref-selected trailer ID fingerprints, catalog Lang, ViewerPreferences, and page transition/action review metadata are preserved before WordPress import while stale trailer/XMP/action operands remain hidden from visible text.",
    "pdfMetadataTrailerIdLangViewerPreferenceCurrentBaseBehaviors": 1,
    "mappedPdfMetadataTrailerIdLangViewerPreferenceCurrentBaseBehaviors": 1,
    "pdfXrefLinearizedPrevHintStartxrefCurrentBase": "Linearized PDF hint-table byte ranges and latest startxref /Prev chains preserve current root/info/id ownership before stale body objects or hint payloads can affect WordPress text extraction.",
    "pdfXrefLinearizedPrevHintStartxrefCurrentBaseBehaviors": 1,
    "mappedPdfXrefLinearizedPrevHintStartxrefCurrentBaseBehaviors": 1,
    "pdfRichMediaScreenSelectorRenditionCurrentBase": "RichMedia and Screen annotation Rendition selector dictionaries expose play/pause/stop policy, target annotation, and chained action review metadata without executing media or leaking JavaScript/media payload text.",
    "pdfRichMediaScreenSelectorRenditionCurrentBaseBehaviors": 1,
    "mappedPdfRichMediaScreenSelectorRenditionCurrentBaseBehaviors": 1,
    "pdfAnnotationActionReferenceCoverageCurrentBase": "Generic page annotation action review exposes embedded GoTo, article Thread, optional-content state, transition, Movie, Sound, Rendition, and RichMediaExecute references as inert metadata without executing actions or leaking operands to visible text.",
    "pdfAnnotationActionReferenceCoverageCurrentBaseBehaviors": 2,
    "mappedPdfAnnotationActionReferenceCoverageCurrentBaseBehaviors": 2,
    "suppliedImageOnlyOcrHandoffCurrentBase": "Supplied image-only PDF pages with no extracted text blocks return empty visible Markdown plus markerpdf.image_only_pdf_ocr_handoff.v1 metadata, selected rendered-image summaries, OCR-required reasons, and adapter hooks without running OCR/models or exposing raster payload bytes.",
    "suppliedImageOnlyOcrHandoffCurrentBaseBehaviors": 1,
    "mappedSuppliedImageOnlyOcrHandoffCurrentBaseBehaviors": 1,
    "pdfImageDeviceNSeparationSmaskDecodeCurrentBase": "DeviceN and Separation image alternate color spaces preserve tint names, Decode bounds, and soft-mask alpha review metadata before RGB preview planning without raster execution.",
    "pdfImageDeviceNSeparationSmaskDecodeCurrentBaseBehaviors": 1,
    "mappedPdfImageDeviceNSeparationSmaskDecodeCurrentBaseBehaviors": 1,
    "pdfMetadataPortfolioAssociatedPieceInfoChecksumCurrentBase": "PDF Portfolio and associated FileSpec PieceInfo private streams expose declared size/checksum/date and computed checksum match-state review metadata while attachment payloads and private metadata stay out of document roots and visible text.",
    "pdfMetadataPortfolioAssociatedPieceInfoChecksumCurrentBaseBehaviors": 1,
    "mappedPdfMetadataPortfolioAssociatedPieceInfoChecksumCurrentBaseBehaviors": 1,
    "tableRotatedHeaderAccessibilityGridCurrentBase": "Forced-OCR rotated table headers preserve column/row header axes, header ids, and WordPress headers attributes before table rendering, including rotated header cells and rowspans.",
    "tableRotatedHeaderAccessibilityGridCurrentBaseBehaviors": 1,
    "mappedTableRotatedHeaderAccessibilityGridCurrentBaseBehaviors": 1,
    "pdfFontType3ColorGlyphResourceWidthCurrentBase": "Type3 color glyph CharProc resources contribute glyph width and text grouping metadata while resource drawing payload text remains excluded from WordPress paragraphs.",
    "pdfFontType3ColorGlyphResourceWidthCurrentBaseBehaviors": 1,
    "mappedPdfFontType3ColorGlyphResourceWidthCurrentBaseBehaviors": 1,
    "layoutPageHeaderFooterRotatedColumnsCurrentBase": "Rotated page header/footer and multi-column block bboxes are transformed before layout ordering so visible WordPress paragraphs follow source order across rotated page geometry.",
    "layoutPageHeaderFooterRotatedColumnsCurrentBaseBehaviors": 1,
    "mappedLayoutPageHeaderFooterRotatedColumnsCurrentBaseBehaviors": 1,
    "pdfParserObjectStreamGenerationOffsetOwnerCurrentBase": "Object-stream member ownership honors current xref-selected generations and offset owners while commented or stale object-stream header numbers remain ignored before WordPress text extraction.",
    "pdfParserObjectStreamGenerationOffsetOwnerCurrentBaseBehaviors": 1,
    "mappedPdfParserObjectStreamGenerationOffsetOwnerCurrentBaseBehaviors": 1,
    "pdfMetadataXmpLangMarkInfoCatalogCurrentBase": "Current xref-selected XMP language, catalog MarkInfo, and catalog review metadata are exposed without promoting XMP packet text or review-only fields into visible WordPress paragraphs.",
    "pdfMetadataXmpLangMarkInfoCatalogCurrentBaseBehaviors": 1,
    "mappedPdfMetadataXmpLangMarkInfoCatalogCurrentBaseBehaviors": 1,
    "pdfOutlineDirectNamedThreadActionCurrentBase": "Direct named destinations and chained Thread actions carry target page/thread context onto outline review rows without executing actions or leaking action operands into visible content.",
    "pdfOutlineDirectNamedThreadActionCurrentBaseBehaviors": 1,
    "mappedPdfOutlineDirectNamedThreadActionCurrentBaseBehaviors": 1,
    "pdfXrefObjectStreamPrevGenerationRebuildCurrentBase": "Current object-stream carriers are rebuilt before stale previous-generation direct carriers or Prev rows can override selected page objects during WordPress text extraction.",
    "pdfXrefObjectStreamPrevGenerationRebuildCurrentBaseBehaviors": 1,
    "mappedPdfXrefObjectStreamPrevGenerationRebuildCurrentBaseBehaviors": 1,
    "pdfPageAssociatedFilesMarkedContentAltCurrentBase": "Page associated FileSpec provenance is attached to marked-content Alt review rows while embedded payloads and attachment metadata remain review-only.",
    "pdfPageAssociatedFilesMarkedContentAltCurrentBaseBehaviors": 1,
    "mappedPdfPageAssociatedFilesMarkedContentAltCurrentBaseBehaviors": 1,
    "markerRuntimeServerUploadPaginationErrorBoundaryCurrentBase": "Marker server upload planning preserves pagination fields, remote error payload boundaries, and temporary-file cleanup behavior without live HTTP, Uvicorn, Python models, or external services.",
    "markerRuntimeServerUploadPaginationErrorBoundaryCurrentBaseBehaviors": 1,
    "mappedMarkerRuntimeServerUploadPaginationErrorBoundaryCurrentBaseBehaviors": 1,
    "markerRuntimePreviewArtifactBoundaryCurrentBase": "Saved output artifacts expose runtime-only preview HTML and artifact metadata while image payload bytes remain stored artifacts instead of visible WordPress content.",
    "markerRuntimePreviewArtifactBoundaryCurrentBaseBehaviors": 1,
    "mappedMarkerRuntimePreviewArtifactBoundaryCurrentBaseBehaviors": 1,
    "pdfAttachmentFileSpecAFRelationshipChecksumCurrentBase": "Current xref-selected associated FileSpec AFRelationship and checksum metadata are exposed as review rows while embedded payload bytes and private streams stay out of visible WordPress content.",
    "pdfAttachmentFileSpecAFRelationshipChecksumCurrentBaseBehaviors": 1,
    "mappedPdfAttachmentFileSpecAFRelationshipChecksumCurrentBaseBehaviors": 1,
    "pdfAttachmentPageAssociatedFilePreflightCurrentBase": "Lightweight attachment preflight summarizes page-level /AF associated FileSpec rows, carries page number/object/index metadata, and marks EmbeddedFiles mirrors without exposing attachment payload bytes.",
    "pdfAttachmentPageAssociatedFilePreflightCurrentBaseBehaviors": 1,
    "mappedPdfAttachmentPageAssociatedFilePreflightCurrentBaseBehaviors": 1,
    "pdfFontSimpleType3CMapSpacingCurrentBase": "Type3 Encoding CMap CID 32 is honored as source word spacing before WordPress paragraph grouping, preserving text spacing while excluding glyph resource payloads.",
    "pdfFontSimpleType3CMapSpacingCurrentBaseBehaviors": 1,
    "mappedPdfFontSimpleType3CMapSpacingCurrentBaseBehaviors": 1,
    "tableOcrStructureAssignmentRegressionCurrentBase": "Forced-OCR table structure assignment preserves supplied OCR text by geometry even when recognition returns reordered structure cells before WordPress table rendering.",
    "tableOcrStructureAssignmentRegressionCurrentBaseBehaviors": 1,
    "mappedTableOcrStructureAssignmentRegressionCurrentBaseBehaviors": 1,
    "markerRuntimeServerBenchmarkOutputErrorRoundtripCurrentBase": "Marker server upload errors roundtrip through benchmark output artifact JSON with review-only payload boundaries and no live server/model execution.",
    "markerRuntimeServerBenchmarkOutputErrorRoundtripCurrentBaseBehaviors": 1,
    "mappedMarkerRuntimeServerBenchmarkOutputErrorRoundtripCurrentBaseBehaviors": 1,
    "pdfAcroFormSignatureXfaWidgetActionBoundaryCurrentBase": "Page-only signature widgets are attached to XFA action review metadata without executing signature, form, appearance, or JavaScript actions.",
    "pdfAcroFormSignatureXfaWidgetActionBoundaryCurrentBaseBehaviors": 1,
    "mappedPdfAcroFormSignatureXfaWidgetActionBoundaryCurrentBaseBehaviors": 1,
    "pdfOutlineNamedDestinationTransitionThreadSecurityCurrentBase": "Outline named-destination action chains surface security preflight rows with target page transition and article-thread context while hiding action operands and signature bytes from visible text.",
    "pdfOutlineNamedDestinationTransitionThreadSecurityCurrentBaseBehaviors": 1,
    "mappedPdfOutlineNamedDestinationTransitionThreadSecurityCurrentBaseBehaviors": 1,
    "pdfPageStructTreeMarkedContentAssociatedFilesCurrentBase": "ParentTree StructElem associated files propagate onto marked-content tagged rows while embedded payloads remain review-only.",
    "pdfPageStructTreeMarkedContentAssociatedFilesCurrentBaseBehaviors": 1,
    "mappedPdfPageStructTreeMarkedContentAssociatedFilesCurrentBaseBehaviors": 1,
    "pdfParserSecurityXrefFilterErrorBoundaryCurrentBase": "Current startxref xref-stream filter decoding failures fail closed before stale xref-table fallback can expose hidden or outdated WordPress text.",
    "pdfParserSecurityXrefFilterErrorBoundaryCurrentBaseBehaviors": 1,
    "mappedPdfParserSecurityXrefFilterErrorBoundaryCurrentBaseBehaviors": 1,
    "pdfXrefHybridLinearizedObjectStreamGenerationCurrentBase": "Hybrid linearized xref repair preserves repaired direct generation pages when hint ranges cover stale compressed members before WordPress extraction.",
    "pdfXrefHybridLinearizedObjectStreamGenerationCurrentBaseBehaviors": 1,
    "mappedPdfXrefHybridLinearizedObjectStreamGenerationCurrentBaseBehaviors": 1,
    "pdfMetadataPdfaCatalogAssociatedOutlineCurrentBase": "Current catalog associated FileSpec /AF entries compose PDF/A OutputIntent provenance, attachment-local PieceInfo/checksum metadata, outline destination context, and XMP/Info fallback boundaries while embedded payloads, private XMP, ICC profile bytes, and action targets remain review-only.",
    "pdfMetadataPdfaCatalogAssociatedOutlineCurrentBaseBehaviors": 1,
    "mappedPdfMetadataPdfaCatalogAssociatedOutlineCurrentBaseBehaviors": 1,
    "pdfImageInlineJpxColorKeyOutputPreviewCurrentBase": "Inline JPX ColorKey output-preview rows are derived from supplied decoded JPEG2000 samples while raw JPX payload bytes remain review-only and excluded from WordPress text.",
    "pdfImageInlineJpxColorKeyOutputPreviewCurrentBaseBehaviors": 1,
    "mappedPdfImageInlineJpxColorKeyOutputPreviewCurrentBaseBehaviors": 1,
    "tableOcrHeaderGridCaptionCellspanCurrentBase": "Forced-OCR table header-grid captions bind caption and section accessibility metadata to explicit cellspan occupancy, covered cells, header ids, and data-cell headers before Markdown drops span cells.",
    "tableOcrHeaderGridCaptionCellspanCurrentBaseBehaviors": 1,
    "mappedTableOcrHeaderGridCaptionCellspanCurrentBaseBehaviors": 1,
    "pdfFontCidType3ToUnicodeSpacingWidthCurrentBase": "Type0 CID and Type3 CMap fonts count Encoding CMap CID 32 as source word spacing before ToUnicode text grouping, preserving WordPress paragraphs with current font widths and nonliteral source space codes.",
    "pdfFontCidType3ToUnicodeSpacingWidthCurrentBaseBehaviors": 1,
    "mappedPdfFontCidType3ToUnicodeSpacingWidthCurrentBaseBehaviors": 1,
    "pdfFontType0Type3BroaderBehaviorCurrentBase": "Type0 direct-referenced DescendantFonts dictionaries and Type3 CMap CharProc d0/d1 widths now feed native text advance grouping before WordPress extraction without Python/pdftext execution.",
    "pdfFontType0Type3BroaderBehaviorCurrentBaseBehaviors": 2,
    "mappedPdfFontType0Type3BroaderBehaviorCurrentBaseBehaviors": 2,
    "pdfParserStreamDictionaryXrefOwnerRecoveryCurrentBase": "Xref-stream dictionaries recover exact-generation Length, Filter, and DecodeParms helper owners before stale same-number objects can redirect current-base WordPress text extraction.",
    "pdfParserStreamDictionaryXrefOwnerRecoveryCurrentBaseBehaviors": 1,
    "mappedPdfParserStreamDictionaryXrefOwnerRecoveryCurrentBaseBehaviors": 1,
    "markerRuntimeConvertServerOutputPaginationBoundaryCurrentBase": "Marker server completed conversion responses preserve upstream paginated Markdown page markers as review metadata and split page segments without live HTTP, FastAPI, Uvicorn, Python models, or external services.",
    "markerRuntimeConvertServerOutputPaginationBoundaryCurrentBaseBehaviors": 1,
    "mappedMarkerRuntimeConvertServerOutputPaginationBoundaryCurrentBaseBehaviors": 1,
    "pdfOutlineDestinationActionPageLabelStructureCurrentBase": "Outline destination action review rows expose compact target PageLabel, page-number, MCID, raw-role, role, and tagged-text summaries for WordPress navigation review while URI, JavaScript, destination names, and outline titles remain review-only.",
    "pdfOutlineDestinationActionPageLabelStructureCurrentBaseBehaviors": 1,
    "mappedPdfOutlineDestinationActionPageLabelStructureCurrentBaseBehaviors": 1,
    "pdfPageAnnotationStructTreeAssociatedTransitionCurrentBase": "Current page annotation local-destination action rows carry target page labels, transitions, page actions, StructParent, and associated FileSpec context before WordPress rendering without executing actions.",
    "pdfPageAnnotationStructTreeAssociatedTransitionCurrentBaseBehaviors": 1,
    "mappedPdfPageAnnotationStructTreeAssociatedTransitionCurrentBaseBehaviors": 1,
    "pdfXrefStreamPrevHybridGenerationRecoveryCurrentBase": "Current xref-stream /Prev chains recover previous hybrid object-stream members when a malformed generation-noise carrier row selects no direct carrier object, preserving WordPress-visible current/recovered page text and excluding stale carrier/member dictionary text.",
    "pdfXrefStreamPrevHybridGenerationRecoveryCurrentBaseBehaviors": 1,
    "mappedPdfXrefStreamPrevHybridGenerationRecoveryCurrentBaseBehaviors": 1,
    "pdfImageRenderingColorSpaceSoftMaskTransferBundleCurrentBase": "Named DeviceN/Separation image rendering resolves current ColorSpace resources, preserves current soft-mask stream and transfer-function boundaries, records preview-only raster filters, and summarizes RGB/alpha review metadata without executing pypdfium, PIL, Python models, or external PDF tools.",
    "pdfImageRenderingColorSpaceSoftMaskTransferBundleCurrentBaseBehaviors": 1,
    "mappedPdfImageRenderingColorSpaceSoftMaskTransferBundleCurrentBaseBehaviors": 1,
    "pdfLayoutPageAnnotationStructTreeTableBundleCurrentBase": "Supplied table formatting carries page review metadata into table-context rows, linking page StructTree marked content, page PieceInfo, and overlapping annotation StructParent associated files while excluding unrelated annotation/review payload text from visible WordPress output.",
    "pdfLayoutPageAnnotationStructTreeTableBundleCurrentBaseBehaviors": 1,
    "mappedPdfLayoutPageAnnotationStructTreeTableBundleCurrentBaseBehaviors": 1,
    "markerRuntimeServerConvertBenchmarkOutputBundleCurrentBase": "Marker server completed conversion responses are represented as benchmark output artifacts, preserving pagination, error JSON, artifact hashes, and review-only boundaries without live FastAPI/Uvicorn, Python models, or external services.",
    "markerRuntimeServerConvertBenchmarkOutputBundleCurrentBaseBehaviors": 1,
    "mappedMarkerRuntimeServerConvertBenchmarkOutputBundleCurrentBaseBehaviors": 1,
    "pdfMetadataCatalogOutlineAssociatedSecurityBundleCurrentBase": "Catalog associated files, PDF/A output intent provenance, outline target context, and security action review rows compose as review-only metadata while embedded payloads and action targets stay out of visible WordPress text.",
    "pdfMetadataCatalogOutlineAssociatedSecurityBundleCurrentBaseBehaviors": 1,
    "mappedPdfMetadataCatalogOutlineAssociatedSecurityBundleCurrentBaseBehaviors": 1,
    "pdfPageAnnotationStructTreeLayoutPreviewBundleCurrentBase": "Page annotation StructParent context, StructTree marked content, page PieceInfo, and supplied layout blocks are bundled into marker app preview metadata without rasterizing through pypdfium/PIL or exposing annotation payload text.",
    "pdfPageAnnotationStructTreeLayoutPreviewBundleCurrentBaseBehaviors": 1,
    "mappedPdfPageAnnotationStructTreeLayoutPreviewBundleCurrentBaseBehaviors": 1,
    "tableOcrSpanGridBenchmarkFormatBundleCurrentBase": "OCR span-grid, row/column-span, caption, and benchmark score provenance are bundled into verifier-compatible table benchmark rows without rerunning Python/model table recognition.",
    "tableOcrSpanGridBenchmarkFormatBundleCurrentBaseBehaviors": 1,
    "mappedTableOcrSpanGridBenchmarkFormatBundleCurrentBaseBehaviors": 1,
    "pdfFontCMapCidType3WidthSpacingBundleCurrentBase": "Type3 Encoding CMap CIDs are used for source-space word spacing before ToUnicode/text-advance grouping, preserving quote-operator paragraph spacing at current base.",
    "pdfFontCMapCidType3WidthSpacingBundleCurrentBaseBehaviors": 1,
    "mappedPdfFontCMapCidType3WidthSpacingBundleCurrentBaseBehaviors": 1,
    "pdfXrefObjectStreamPrevFreeGenerationBundleCurrentBase": "Current free-generation xref rows suppress stale Prev object-stream members while still rebuilding the current carrier needed for WordPress-visible page text.",
    "pdfXrefObjectStreamPrevFreeGenerationBundleCurrentBaseBehaviors": 1,
    "mappedPdfXrefObjectStreamPrevFreeGenerationBundleCurrentBaseBehaviors": 1,
    "outputArtifactPreviewMarkdownImageBundleCurrentBase": "Saved Markdown image artifacts are bundled with sanitized filenames, optional title metadata, preview-target accounting, rewritten Markdown references, and runtime preview boundaries without exposing raw image payload bytes.",
    "outputArtifactPreviewMarkdownImageBundleCurrentBaseBehaviors": 1,
    "mappedOutputArtifactPreviewMarkdownImageBundleCurrentBaseBehaviors": 1,
    "pdfAcroFormSignatureXfaWidgetActionBundleCurrentBase": "Mixed signature field widgets retain page annotation order, inherited AcroForm resources, XFA packet review rows, signature locks, widget actions, and current values as review-only metadata without executing form or PDF actions.",
    "pdfAcroFormSignatureXfaWidgetActionBundleCurrentBaseBehaviors": 1,
    "mappedPdfAcroFormSignatureXfaWidgetActionBundleCurrentBaseBehaviors": 1,
    "pdfPageStructTreeThreadAnnotationActionCurrentBase": "Page review rows compose StructTree marked content, article-thread beads, annotation StructParents, reply threads, and page action context while excluding action operands and detached annotation text from visible WordPress paragraphs.",
    "pdfPageStructTreeThreadAnnotationActionCurrentBaseBehaviors": 1,
    "mappedPdfPageStructTreeThreadAnnotationActionCurrentBaseBehaviors": 1,
    "layoutTableOcrPageReviewSectionOrderCurrentBase": "Layout-ordered OCR table conversion preserves section/caption order, table Markdown, and page-review metadata boundaries before WordPress output without model execution.",
    "layoutTableOcrPageReviewSectionOrderCurrentBaseBehaviors": 1,
    "mappedLayoutTableOcrPageReviewSectionOrderCurrentBaseBehaviors": 1,
    "markerRuntimeConvertServerBenchmarkArtifactUploadCurrentBase": "Marker server upload conversion outputs are represented as benchmark artifact JSON with hashes, pagination, and error boundaries without live HTTP/FastAPI/Python model workers.",
    "markerRuntimeConvertServerBenchmarkArtifactUploadCurrentBaseBehaviors": 1,
    "mappedMarkerRuntimeConvertServerBenchmarkArtifactUploadCurrentBaseBehaviors": 1,
    "pdfMetadataAssociatedFileOutputIntentEncryptXmpCurrentBase": "Encrypted PDFs preserve unencrypted root XMP review metadata while blocking encrypted associated FileSpec metadata, embedded payloads, and OutputIntent rows from visible content.",
    "pdfMetadataAssociatedFileOutputIntentEncryptXmpCurrentBaseBehaviors": 1,
    "mappedPdfMetadataAssociatedFileOutputIntentEncryptXmpCurrentBaseBehaviors": 1,
    "pdfParserCMapFilterOwnerStreamLengthCurrentBase": "Filtered ToUnicode CMap streams resolve current Length and Filter owners before text extraction, failing closed for stale or encrypted owner ambiguity.",
    "pdfParserCMapFilterOwnerStreamLengthCurrentBaseBehaviors": 1,
    "mappedPdfParserCMapFilterOwnerStreamLengthCurrentBaseBehaviors": 1,
    "pdfSecurityAcroFormDssActionAttachmentBundleCurrentBase": "AcroForm action FileSpec targets are aggregated into DSS/signature permission review metadata, including related embedded-file hashes and review-only action safety flags, while action payloads stay out of visible text.",
    "pdfSecurityAcroFormDssActionAttachmentBundleCurrentBaseBehaviors": 1,
    "mappedPdfSecurityAcroFormDssActionAttachmentBundleCurrentBaseBehaviors": 1,
    "pdfParserXrefStreamCompressedOperandOwnerCurrentBase": "Xref-stream indirect Filter and DecodeParms operands can resolve from current compressed object-stream helper entries before stale same-number direct helpers can own current-base extraction.",
    "pdfParserXrefStreamCompressedOperandOwnerCurrentBaseBehaviors": 1,
    "mappedPdfParserXrefStreamCompressedOperandOwnerCurrentBaseBehaviors": 1,
    "pdfImageColorSpaceMaskInlineOutputPreviewCurrentBase": "Inline image output preview rows expand Indexed/RGB samples with Decode arrays, ColorKey masks, supplied JPX samples, and current soft-mask alpha without native raster execution or visible payload leakage.",
    "pdfImageColorSpaceMaskInlineOutputPreviewCurrentBaseBehaviors": 1,
    "mappedPdfImageColorSpaceMaskInlineOutputPreviewCurrentBaseBehaviors": 1,
    "pdfAcroFormWidgetXfaActionAppearanceValueCurrentBase": "AcroForm widget review preserves field /V and /DV authority while correlating XFA references, selected appearance streams, and field/widget actions without importing XFA values or executing actions.",
    "pdfAcroFormWidgetXfaActionAppearanceValueCurrentBaseBehaviors": 1,
    "mappedPdfAcroFormWidgetXfaActionAppearanceValueCurrentBaseBehaviors": 1,
    "pdfFontCidWidthResourceSpacingCurrentBase": "CIDFont width metrics, source CMap word spacing, and text-state horizontal scale are applied to native styled-span bboxes without falling back to raw decoded-text advance.",
    "pdfFontCidWidthResourceSpacingCurrentBaseBehaviors": 1,
    "mappedPdfFontCidWidthResourceSpacingCurrentBaseBehaviors": 1,
    "markerRuntimeServerUploadBenchmarkErrorCurrentBase": "Failed marker server uploads become review-only benchmark error artifacts while successful uploads continue through sanitized benchmark output bundles without live FastAPI/Uvicorn execution.",
    "markerRuntimeServerUploadBenchmarkErrorCurrentBaseBehaviors": 1,
    "mappedMarkerRuntimeServerUploadBenchmarkErrorCurrentBaseBehaviors": 1,
    "outputMarkdownImageArtifactQualityCurrentBase": "Saved Markdown image artifacts are audited for PNG signature, IHDR dimensions, IEND, CRC, WordPress media importability, and warning counts without changing saved Markdown/runtime preview contracts.",
    "outputMarkdownImageArtifactQualityCurrentBaseBehaviors": 1,
    "mappedOutputMarkdownImageArtifactQualityCurrentBaseBehaviors": 1,
    "pdfXrefObjectStreamHybridGenerationOwnerCurrentBase": "Hybrid xref table carrier generations own object-stream expansion before companion xref-stream type-2 member rows, keeping stale generation-zero payloads out of visible text.",
    "pdfXrefObjectStreamHybridGenerationOwnerCurrentBaseBehaviors": 1,
    "mappedPdfXrefObjectStreamHybridGenerationOwnerCurrentBaseBehaviors": 1,
    "tableOcrBenchmarkSpanGridQualityCurrentBase": "OCR table benchmark rows expose span-grid quality gates for complete grids, contiguous spans, resolved covered cells, orphan cells, and malformed/missing grids before benchmark trust.",
    "tableOcrBenchmarkSpanGridQualityCurrentBaseBehaviors": 1,
    "mappedTableOcrBenchmarkSpanGridQualityCurrentBaseBehaviors": 1,
    "pdfOutlineDestinationThreadActionMetadataCurrentBase": "Outline destinations that resolve to Thread action dictionaries propagate target page, bead, transition, page-review, and chained action metadata without executing actions or leaking operands.",
    "pdfOutlineDestinationThreadActionMetadataCurrentBaseBehaviors": 1,
    "mappedPdfOutlineDestinationThreadActionMetadataCurrentBaseBehaviors": 1,
    "tableOcrRotatedHeaderCaptionCurrentBase": "Supplied OCR rotated table headers and captions are bound to physical-axis cellspan review metadata while stale table text is replaced before WordPress rendering.",
    "tableOcrRotatedHeaderCaptionCurrentBaseBehaviors": 1,
    "mappedTableOcrRotatedHeaderCaptionCurrentBaseBehaviors": 1,
    "pdfXrefStreamPrevIndexWidthRepairCurrentBase": "Malformed /Prev xref-stream Index rows with zero-width W generation fields are repaired by current object offsets without selecting stale page objects.",
    "pdfXrefStreamPrevIndexWidthRepairCurrentBaseBehaviors": 1,
    "mappedPdfXrefStreamPrevIndexWidthRepairCurrentBaseBehaviors": 1,
    "pdfSecurityPermissionDssActionChainCurrentBase": "DSS validation material and signature permission transforms are summarized for post-signature action chains without granting action execution or rights enforcement.",
    "pdfSecurityPermissionDssActionChainCurrentBaseBehaviors": 1,
    "mappedPdfSecurityPermissionDssActionChainCurrentBaseBehaviors": 1,
    "markerRuntimeConvertServerUploadPaginationCurrentBase": "Marker server upload pagination review records upload cleanup, form fields, remote trace, and page segments while excluding uploaded PDF bytes and live server execution.",
    "markerRuntimeConvertServerUploadPaginationCurrentBaseBehaviors": 1,
    "mappedMarkerRuntimeConvertServerUploadPaginationCurrentBaseBehaviors": 1,
    "pdfFontType0CidSetVerticalSpacingCurrentBase": "Predefined UCS2 vertical Type0 CMaps use two-byte source widths and vertical writing mode before CIDSet/default spacing decisions.",
    "pdfFontType0CidSetVerticalSpacingCurrentBaseBehaviors": 1,
    "mappedPdfFontType0CidSetVerticalSpacingCurrentBaseBehaviors": 1,
    "pdfAcroFormWidgetAppearanceExportCurrentBase": "AcroForm button widget appearance states resolve effective export values from /Opt without executing submit actions or rendering appearances.",
    "pdfAcroFormWidgetAppearanceExportCurrentBaseBehaviors": 1,
    "mappedPdfAcroFormWidgetAppearanceExportCurrentBaseBehaviors": 1,
    "pdfMetadataXmpNameTreeAssociatedSchemaCurrentBase": "Current root XMP extension schema declarations correlate with EmbeddedFiles name-tree schema attachments and PDF/A output intent review metadata without exposing payload bytes.",
    "pdfMetadataXmpNameTreeAssociatedSchemaCurrentBaseBehaviors": 1,
    "mappedPdfMetadataXmpNameTreeAssociatedSchemaCurrentBaseBehaviors": 1,
    "pdfImageColorSpaceSmaskJpeg2000OutputCurrentBase": "Supplied JPEG2000 decoded samples map through current color-space resources, Decode arrays, external SMask alpha, and PDF/A review context into RGB/RGBA rows without native JPX raster execution.",
    "pdfImageColorSpaceSmaskJpeg2000OutputCurrentBaseBehaviors": 1,
    "mappedPdfImageColorSpaceSmaskJpeg2000OutputCurrentBaseBehaviors": 1,
    "pdfPageAnnotationAssociatedStructTreeReviewCurrentBase": "Page annotations without StructParent rows can associate through StructTree OBJR fallback rows while keeping review-only structure text and attachment names out of visible content.",
    "pdfPageAnnotationAssociatedStructTreeReviewCurrentBaseBehaviors": 1,
    "mappedPdfPageAnnotationAssociatedStructTreeReviewCurrentBaseBehaviors": 1,
    "pdfParserInlineStreamJpxCMapRepairCurrentBase": "Malformed inline JPX image boundaries fall back to delimiter-style EI repair so following CMap-decoded text remains visible while JPX payload bytes stay hidden.",
    "pdfParserInlineStreamJpxCMapRepairCurrentBaseBehaviors": 1,
    "mappedPdfParserInlineStreamJpxCMapRepairCurrentBaseBehaviors": 1,
    "pdfInlineImageTokenizerBoundaryCurrentBase": "Content-stream tokenization now recovers malformed standalone BI preambles that are not followed by image dictionary name/value pairs while preserving real BI/ID/EI inline image payload exclusion, including indirect inline-image values such as /SMask n n R.",
    "pdfInlineImageTokenizerBoundaryCurrentBaseBehaviors": 1,
    "mappedPdfInlineImageTokenizerBoundaryCurrentBaseBehaviors": 1,
    "outputMarkdownTableImageArtifactCurrentBase": "Markdown table image references are accounted by table/cell coordinate, persisted artifact hash, missing target, runtime-preview data URI expectation, and unreferenced table image artifacts without exposing image payloads.",
    "outputMarkdownTableImageArtifactCurrentBaseBehaviors": 2,
    "tableGridGeometryBoundaryCurrentBase": "Supplied table row and column bands are clipped to the cropped table-image boundary before WordPress grid review metadata is emitted, with out-of-crop bands excluded while Markdown table text remains stable without Python/model execution.",
    "tableGridGeometryBoundaryCurrentBaseBehaviors": 1,
    "mappedTableGridGeometryBoundaryCurrentBaseBehaviors": 1,
    "pdfTextDictionaryKeepCharsBoundaryCurrentBase": "Supplied pdftext dictionary pages are sanitized like dictionary_output(..., keep_chars=false): block and line payloads are reduced to core bbox/children fields and raw per-character span payloads are dropped before WordPress import metadata keeps char offsets reviewable.",
    "pdfTextDictionaryKeepCharsBoundaryCurrentBaseBehaviors": 1,
    "mappedPdfTextDictionaryKeepCharsBoundaryCurrentBaseBehaviors": 1,
    "pdfParserXrefStreamIndirectIndexWidthCurrentBase": "Xref-stream /W, /Index, and /Size operands resolve through current indirect helper objects before object-stream page selection, preventing stale fallback object-stream text from winning current-base extraction.",
    "pdfParserXrefStreamIndirectIndexWidthCurrentBaseBehaviors": 1,
    "mappedPdfParserXrefStreamIndirectIndexWidthCurrentBaseBehaviors": 1,
    "markerRuntimeSinglePreflightBoundaryCurrentBase": "Single-document convert_single.py admission records PYTORCH MPS fallback, option parsing, all-model load boundary, convert_single_pdf call shape, no batch resume/min_length gates, and save_markdown empty-output persistence without executing Python, Torch, model workers, Streamlit, FastAPI, or external PDF tools.",
    "markerRuntimeSinglePreflightBoundaryCurrentBaseBehaviors": 1,
    "mappedMarkerRuntimeSinglePreflightBoundaryCurrentBaseBehaviors": 1,
    "pdfFontWidthAdvanceBoundaryCurrentBase": "Simple-font missing glyph codes use the average of positive /Widths entries as their default advance when /MissingWidth is absent, quote-operator word/character spacing is applied before native styled-span bbox advance, and vertical Type0 CIDFont /W2 displacements drive styled-span bbox height, preventing false WordPress text gaps while preserving review geometry.",
    "pdfFontWidthAdvanceBoundaryCurrentBaseBehaviors": 3,
    "mappedPdfFontWidthAdvanceBoundaryCurrentBaseBehaviors": 3,
    "pdfTextDictionaryPostprocessCurrentBase": "Supplied pdftext dictionary_output span payloads normalize span text before WordPress block merging, preserving selected page ranges/options while trimming pdftext artifacts from paragraph output.",
    "pdfTextDictionaryPostprocessCurrentBaseBehaviors": 1,
    "mappedPdfTextDictionaryPostprocessCurrentBaseBehaviors": 1,
    "pdfTextDictionarySortBoundaryCurrentBase": "Supplied pdftext dictionary pages can opt into dictionary_output(sort=true) row/column block ordering before Marker page conversion, preserving the default unsorted markerPDF path while aligning sorted char_blocks and visible WordPress paragraphs.",
    "pdfTextDictionarySortBoundaryCurrentBaseBehaviors": 1,
    "mappedPdfTextDictionarySortBoundaryCurrentBaseBehaviors": 1,
    "pdfTextDictionaryBlankPageBoundaryCurrentBase": "Selected pdftext dictionary pages with no text characters and blocks: [] remain present as blank Marker pages, preserve page range and page-number metadata, and emit no Gutenberg paragraph text while paginated review can still expose the page-start marker.",
    "pdfTextDictionaryBlankPageBoundaryCurrentBaseBehaviors": 1,
    "mappedPdfTextDictionaryBlankPageBoundaryCurrentBaseBehaviors": 1,
    "pdfNamedDestinationExtractorCurrentBase": "Native named-destination extraction resolves catalog /Names /Dests trees and legacy /Dests dictionaries to page indices and Fit/XYZ coordinates while skipping malformed values, ignoring stale duplicate catalog/name-tree objects appended after the current %%EOF, and rejecting mismatched-generation name-tree kids, indirect names, destination dictionaries, page refs, and legacy /Dests rows.",
    "pdfNamedDestinationExtractorCurrentBaseBehaviors": 3,
    "mappedPdfNamedDestinationExtractorCurrentBaseBehaviors": 3,
    "markerAppPreviewPageLabelsCurrentBase": "MarkerAppPreview resolves catalog PageLabels number-tree sections into per-page labels for preview/page-inventory metadata, including roman, repeated-letter alphabetic, prefixed, prefix-only, indirect-Kids, indirect /S /P /St operands, and /Limits-bounded labels.",
    "markerAppPreviewPageLabelsCurrentBaseBehaviors": 5,
    "mappedMarkerAppPreviewPageLabelsCurrentBaseBehaviors": 5,
    "pdfXrefPrevIncrementalContentEvidenceCurrentBase": "Incremental xref /Prev chains keep the latest current content stream for reused objects before text extraction, preventing superseded draft stream text from winning WordPress imports.",
    "pdfXrefPrevIncrementalContentEvidenceCurrentBaseBehaviors": 1,
    "mappedPdfXrefPrevIncrementalContentEvidenceCurrentBaseBehaviors": 1,
    "pdfCcittFaxFilterBoundaryCurrentBase": "CCITTFaxDecode/CCF image-only streams are skipped before WordPress text extraction while filter metadata remains review-only, preventing compressed fax payload bytes from becoming visible paragraphs.",
    "pdfCcittFaxFilterBoundaryCurrentBaseBehaviors": 1,
    "mappedPdfCcittFaxFilterBoundaryCurrentBaseBehaviors": 1,
    "pdfParserMalformedCMapFilterLiteralCurrentBase": "Malformed ToUnicode CMap filter operators and literal boundaries fall back to simple font encoding, preserving searchable WordPress text while ignoring corrupt CMap payloads.",
    "pdfParserMalformedCMapFilterLiteralCurrentBaseBehaviors": 1,
    "mappedPdfParserMalformedCMapFilterLiteralCurrentBaseBehaviors": 1,
    "pdfInlineDctDecodeFilterBoundaryCurrentBase": "Inline DCTDecode/JPEG image payloads are bounded through EOI/EI scanning before text-token parsing so JPEG bytes cannot leak into WordPress paragraphs.",
    "pdfInlineDctDecodeFilterBoundaryCurrentBaseBehaviors": 1,
    "mappedPdfInlineDctDecodeFilterBoundaryCurrentBaseBehaviors": 1,
    "pdfXrefObjectStreamSkippedHeaderIndexCurrentBase": "Object-stream member indexes remain aligned when compressed-object headers contain skipped rows, keeping current xref-selected page objects authoritative before WordPress extraction.",
    "pdfXrefObjectStreamSkippedHeaderIndexCurrentBaseBehaviors": 1,
    "mappedPdfXrefObjectStreamSkippedHeaderIndexCurrentBaseBehaviors": 1,
    "pdfCMapDefaultWidthSourceFallbackCurrentBase": "Type0 CMap source-code widths provide default glyph segmentation when codespacerange rows are absent, preserving text advance and word boundaries before WordPress paragraphs.",
    "pdfCMapDefaultWidthSourceFallbackCurrentBaseBehaviors": 1,
    "mappedPdfCMapDefaultWidthSourceFallbackCurrentBaseBehaviors": 1,
    "pdfImageXObjectFormResourceBoundaryCurrentBase": "Image XObjects referenced through Form XObject resources stay image-only review metadata while form-scoped text resources remain extractable.",
    "pdfImageXObjectFormResourceBoundaryCurrentBaseBehaviors": 1,
    "mappedPdfImageXObjectFormResourceBoundaryCurrentBaseBehaviors": 1,
    "pdfImageXObjectPlacementBoundaryCurrentBase": "Image XObject invocations preserve q/Q/cm graphics-state CTM placement, rectangular clipping-path painted bboxes, rotated unit-square page bboxes, and nested Form XObject /Matrix placement review metadata without raster execution or payload text leakage.",
    "pdfImageXObjectPlacementBoundaryCurrentBaseBehaviors": 2,
    "mappedPdfImageXObjectPlacementBoundaryCurrentBaseBehaviors": 2,
    "pdfEncryptedPermissionCryptFilterPreflightCurrentBase": "Encrypted Standard security-handler preflight records StmF, StrF, and EFF crypt-filter content-role status as identity, encrypted, or missing review metadata while keeping text import blocked until decryption is available.",
    "pdfEncryptedPermissionCryptFilterPreflightCurrentBaseBehaviors": 1,
    "mappedPdfEncryptedPermissionCryptFilterPreflightCurrentBaseBehaviors": 1,
    "pdfQpdfEncryptedPermissionFixtureCurrentBase": "QPDF-derived AES-256 revision-6 encrypted fixtures prove no-extract and copy-allowed Standard permission diagnostics, password-readiness review metadata, and encrypted converter short-circuit decisions without decrypting content, exposing password/key material, running qpdf at test time, or queuing models.",
    "pdfQpdfEncryptedPermissionFixtureCurrentBaseBehaviors": 1,
    "mappedPdfQpdfEncryptedPermissionFixtureCurrentBaseBehaviors": 1,
    "pdfOutlineStructureElementMetadataCurrentBase": "Outline extraction carries structure-element identifiers, roles, language, title, and alt/actual text review metadata without promoting review-only structure text into visible paragraphs.",
    "pdfOutlineStructureElementMetadataCurrentBaseBehaviors": 1,
    "mappedPdfOutlineStructureElementMetadataCurrentBaseBehaviors": 1,
    "pdfTaggedTableSectionOrderingCurrentBase": "Tagged PDF table subtrees normalize irregular THead, repeated TBody, TFoot, TR, and wrapper-node order before WordPress text/table rendering while preserving every cell MCID and section/row/cell diagnostics.",
    "pdfTaggedTableSectionOrderingCurrentBaseBehaviors": 1,
    "mappedPdfTaggedTableSectionOrderingCurrentBaseBehaviors": 1,
    "pdfPageResourceInheritanceCurrentBase": "Page resource inheritance resolves ancestor fonts for leaf page content and legacy Form XObjects while keeping leaf overrides authoritative before WordPress text extraction.",
    "pdfPageResourceInheritanceCurrentBaseBehaviors": 1,
    "mappedPdfPageResourceInheritanceCurrentBaseBehaviors": 1,
    "pdfTextDictionaryLayoutOrderSuppliedRangeCurrentBase": "SuppliedDocumentConverter slices full-document lowres_images, layout_results, order_images, and order_results to the selected pdftext page_range before layout/order assignment, so skipped cover or appendix artifacts cannot drive WordPress paragraph order.",
    "pdfTextDictionaryLayoutOrderSuppliedRangeCurrentBaseBehaviors": 1,
    "mappedPdfTextDictionaryLayoutOrderSuppliedRangeCurrentBaseBehaviors": 1,
    "markerRuntimeMainPreflightBoundaryCurrentBase": "Batch convert.py::main admission records absolute input/output folder normalization, os.makedirs(exist_ok=True) output-folder creation requirements and regular-file conflict boundaries, chunk/max slicing, metadata-file basename lookup, worker-count clamping, empty-task queue risk, invalid-worker pool-risk boundaries, and stdout conversion-summary ordering before task tuple construction or pool launch.",
    "markerRuntimeMainPreflightBoundaryCurrentBaseBehaviors": 3,
    "mappedMarkerRuntimeMainPreflightBoundaryCurrentBaseBehaviors": 3,
    "markerRuntimeNegativeChunkIndexBoundaryCurrentBase": "Batch convert.py::main negative --chunk_idx values preserve raw start/end offsets and Python slice normalization before metadata lookup, task tuple construction, worker-count clamping, and empty-task queue review.",
    "markerRuntimeNegativeChunkIndexBoundaryCurrentBaseBehaviors": 1,
    "mappedMarkerRuntimeNegativeChunkIndexBoundaryCurrentBaseBehaviors": 1,
    "tableNamedBboxGeometryBoundaryCurrentBase": "Supplied table rows, columns, cells, and OCR-line geometry accept named bbox fields such as x1/y1/x2/y2, x_start/y_start/x_end/y_end, and left/top/right/bottom before assignment and crop-boundary review.",
    "tableNamedBboxGeometryBoundaryCurrentBaseBehaviors": 1,
    "mappedTableNamedBboxGeometryBoundaryCurrentBaseBehaviors": 1,
    "pdfFontType3CharProcsFallbackBoundaryCurrentBase": "Exact-generation Type3 CharProc streams are excluded from stream-only fallback WordPress text extraction, so glyph-program payload text cannot leak while real fallback content streams remain visible.",
    "pdfFontType3CharProcsFallbackBoundaryCurrentBaseBehaviors": 1,
    "mappedPdfFontType3CharProcsFallbackBoundaryCurrentBaseBehaviors": 1,
    "pdfLinkAnnotationPageGeometryCurrentBase": "Link annotation rectangles preserve raw PDF page-space /Rect values while adding pdftext display-space rectangles through inherited CropBox, Rotate, and page-local UserUnit before WordPress span promotion.",
    "pdfLinkAnnotationPageGeometryCurrentBaseBehaviors": 1,
    "mappedPdfLinkAnnotationPageGeometryCurrentBaseBehaviors": 1,
    "pdfParserStreamFilterStackBoundaryCurrentBase": "Missing-Length and stale declared-Length page content streams with ordered filter stacks decode the complete ASCII85-to-Flate chain before accepting endstream boundaries, excluding fake encoded endstream markers from WordPress text.",
    "pdfParserStreamFilterStackBoundaryCurrentBaseBehaviors": 2,
    "mappedPdfParserStreamFilterStackBoundaryCurrentBaseBehaviors": 2,
    "pdfInlineImageDecodeBoundaryCurrentBase": "Inline-image candidates now require explicit ASCII85 end markers before accepting delimiter-looking EI bytes and still decode Flate DecodeParms predictor payloads before boundary acceptance, keeping image payload bytes out of WordPress paragraphs.",
    "pdfInlineImageDecodeBoundaryCurrentBaseBehaviors": 2,
    "mappedPdfInlineImageDecodeBoundaryCurrentBaseBehaviors": 2,
    "pdfAcroFormFieldsGenerationBoundaryCurrentBase": "AcroForm field roots, field Kids, page Widget annotations, Widget Parent links, catalog AcroForm, and page-tree Kids resolve only when object generations match the N G R reference, excluding stale same-object-number field values while preserving exact nonzero-generation widgets for WordPress form review.",
    "pdfAcroFormFieldsGenerationBoundaryCurrentBaseBehaviors": 2,
    "mappedPdfAcroFormFieldsGenerationBoundaryCurrentBaseBehaviors": 2,
    "mappedPdfPagePartialExtractionDiagnosticsCurrentBaseBehaviors": 2,
    "pdfPagePartialExtractionDiagnosticsCurrentBase": "Page-level /Contents partial-extraction diagnostics report null and empty page content, malformed array operands, and non-stream content references with page/object metadata while preserving text from valid streams and keeping non-stream payload text out of visible output.",
    "pdfPagePartialExtractionDiagnosticsCurrentBaseBehaviors": 2
}
