Skip to content

API Reference

Helper Functions

PyArchery: Python binding to the Archery document parsing library.

load(file_path, encoding='UTF-8', model=None, hints=None, recipe=None, tag_case=None)

Load a document and create a DocumentWrapper (starts JVM on first use).

Source code in src/pyarchery/__init__.py
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
def load(
    file_path: str | os.PathLike[str],
    encoding: str = "UTF-8",
    model=None,
    hints=None,
    recipe=None,
    tag_case: str | None = None,
) -> DocumentWrapper:
    """Load a document and create a DocumentWrapper (starts JVM on first use)."""
    archery = _archery()
    file_path_str = os.fspath(file_path)
    if not os.path.isfile(file_path_str):
        raise FileNotFoundError(f"Document not found: {file_path_str}")
    if not os.access(file_path_str, os.R_OK):
        raise PermissionError(f"Document is not readable: {file_path_str}")
    doc = archery.DocumentFactory.createInstance(file_path_str, encoding)
    if model:
        doc.setModel(model)
    if hints:
        doc.setHints(hints)
    if recipe:
        doc.setRecipe("\n".join(recipe))
    if tag_case:
        if tag_case == "SNAKE":
            doc.getTagClassifier().setTagStyle(archery.SNAKE)
        elif tag_case == "CAMEL":
            doc.getTagClassifier().setTagStyle(archery.CAMEL)
    return _document_wrapper()(doc)

model_from_json(data)

Create a ModelBuilder from a JSON string (starts JVM on first use).

Source code in src/pyarchery/__init__.py
61
62
63
def model_from_json(data: str) -> ModelBuilder:
    """Create a ModelBuilder from a JSON string (starts JVM on first use)."""
    return _archery().ModelBuilder().fromJSON(data)

model_from_path(path)

Create a ModelBuilder from a file path (starts JVM on first use).

Source code in src/pyarchery/__init__.py
51
52
53
def model_from_path(path: str | os.PathLike[str]) -> ModelBuilder:
    """Create a ModelBuilder from a file path (starts JVM on first use)."""
    return _archery().ModelBuilder().fromPath(os.fspath(path))

model_from_url(url)

Create a ModelBuilder from a URL (starts JVM and fetches remote model).

Source code in src/pyarchery/__init__.py
56
57
58
def model_from_url(url: str) -> ModelBuilder:
    """Create a ModelBuilder from a URL (starts JVM and fetches remote model)."""
    return _archery().ModelBuilder().fromURL(url)

Core Defines

Constants and enumerations for the PyArchery library.

This module defines constants used for document processing hints and tag classification styles, mapping them directly from the underlying Java Archery Framework.

CAMEL = TagClassifier_.TagStyle.CAMEL module-attribute

Camel case tag style.

INTELLI_EXTRACT = Document_.Hint.INTELLI_EXTRACT module-attribute

Hint to enable intelligent extraction.

INTELLI_LAYOUT = Document_.Hint.INTELLI_LAYOUT module-attribute

Hint to enable intelligent layout analysis.

INTELLI_TAG = Document_.Hint.INTELLI_TAG module-attribute

Hint to enable intelligent tagging.

INTELLI_TIME = Document_.Hint.INTELLI_TIME module-attribute

Hint to enable time-based intelligence.

NONE = TagClassifier_.TagStyle.NONE module-attribute

No specific tag style.

SNAKE = TagClassifier_.TagStyle.SNAKE module-attribute

Snake case tag style.