Skip to content

Python API Reference

Stata-MCP provides a Python SDK for programmatic access to Stata functionality. This is useful for building custom agents, integrating with other Python applications, or scripting Stata workflows.

Installation

pip install stata-mcp

Quick Start

from stata_mcp.api import (
    stata_do,
    get_data_info,
    ado_package_install,
    read_log,
    stata_help,
    write_dofile,
)

# Get data information
info = get_data_info("/path/to/data.dta")

# Install a package
result = ado_package_install("outreg2", source="ssc")

# Execute a do-file
log = stata_do("/path/to/analysis.do")

# Read log file
content = read_log("/path/to/output.log")

Runtime Context

The RuntimeContext provides configuration and path management for all API calls.

from stata_mcp.api import RuntimeContext, create_runtime_context

# Create a runtime context
runtime = create_runtime_context()

# With custom config file
runtime = create_runtime_context(config_file="/path/to/config.toml")

# With Stata CLI required (raises error if not found)
runtime = create_runtime_context(require_stata=True)

# Access runtime properties
print(runtime.cwd)              # Current working directory
print(runtime.stata_cli)        # Stata executable path
print(runtime.log_base_path)    # Log directory
print(runtime.dofile_base_path) # Do-file directory
print(runtime.is_unix)          # Is macOS/Linux

RuntimeContext Properties:

Property Type Description
config Config Configuration object
cwd Path Current working directory
stata_cli str \| None Stata executable path
output_base_path Path Base output directory
log_base_path Path Log file directory
dofile_base_path Path Do-file directory
tmp_base_path Path Temporary files directory
is_unix bool Is Unix-like system (macOS/Linux)

API Functions

stata_do()

Execute a Stata do-file and optionally return log content.

def stata_do(
    dofile_path: str,
    log_file_name: str = None,
    is_read_log: bool = True,
    is_replace_log: bool = True,
    enable_smcl: bool = True,
    config_file: str | Path | None = None,
) -> Dict[str, Any]:
    ...

Parameters:

Parameter Type Default Description
dofile_path str required Path to do-file
log_file_name str None Custom log filename (without extension)
is_read_log bool True Read log content after execution
is_replace_log bool True Replace existing log file
enable_smcl bool True Generate SMCL format log
config_file str \| Path None Custom config file path

Returns: Dict[str, Any]

{
    "log_file_path": {
        "text": "/path/to/output.log",
        "smcl": "/path/to/output.smcl"  # if enable_smcl=True
    },
    "log_content": {
        "text": "..."  # log content if is_read_log=True
    }
}

Example:

# Basic execution
result = stata_do("/project/analysis.do")
print(result["log_content"]["text"])

# With custom log name
result = stata_do(
    "/project/analysis.do",
    log_file_name="my_results",
    enable_smcl=False,
)

# Error handling
if "error" in result:
    print(f"Execution failed: {result['error']}")

get_data_info()

Return descriptive statistics for a supported dataset.

def get_data_info(
    data_path: str,
    vars_list: List[str] | None = None,
    encoding: str = "utf-8",
    config_file: str | Path | None = None,
) -> str:
    ...

Parameters:

Parameter Type Default Description
data_path str required Path to data file
vars_list List[str] None Variables to analyze (all if None)
encoding str "utf-8" Text encoding for text-based files
config_file str \| Path None Custom config file path

Returns: str (JSON string)

Supported Formats: - Stata: .dta - CSV/Text: .csv, .tsv, .psv - Excel: .xlsx, .xls - SPSS: .sav, .zsav

Example:

import json

# Get all variables
info_json = get_data_info("/project/data/survey.dta")
info = json.loads(info_json)

# Get specific variables
info_json = get_data_info(
    "/project/data/panel.csv",
    vars_list=["gdp", "inflation", "unemployment"],
    encoding="utf-8",
)

# Access the result
print(info["overview"]["obs"])  # Number of observations
print(info["vars_detail"].keys())  # Variable names

ado_package_install()

Install an ado package from SSC, net, or GitHub.

def ado_package_install(
    package: str,
    source: str = "ssc",
    is_replace: bool = True,
    package_source_from: str = None,
    config_file: str | Path | None = None,
    timeout: int = 300,
) -> str:
    ...

Parameters:

Parameter Type Default Description
package str required Package name or user/repo for GitHub
source str "ssc" Installation source: ssc / net / github
is_replace bool True Replace existing package
package_source_from str None Source URL for net installations
config_file str \| Path None Custom config file path
timeout int 300 Timeout in seconds

Returns: str (installation log or error message)

Example:

# Install from SSC
result = ado_package_install("outreg2")

# Install from GitHub
result = ado_package_install("SepineTam/TexIV", source="github")

# Install from network
result = ado_package_install(
    "custompkg",
    source="net",
    package_source_from="https://example.com/stata/",
)

# Check installation status
result = ado_package_install("estout", is_replace=False)

read_log()

Read a Stata log file.

def read_log(
    file_path: str,
    encoding: str = "utf-8",
    is_beta: bool = False,
    *,
    output_format: Literal["full", "core", "dict"] = "dict",
    config_file: str | Path | None = None,
) -> str:
    ...

Parameters:

Parameter Type Default Description
file_path str required Path to log file (.log or .smcl)
encoding str "utf-8" File encoding
is_beta bool False Enable structured parsing
output_format str "dict" Output format: full / core / dict
config_file str \| Path None Custom config file path

Returns: str

Output Formats: - full: Raw log content - core: Cleaned content without framework lines - dict: Structured command-result pairs (string representation)

Example:

# Read log content
content = read_log("/project/logs/analysis.log")

# Get clean output
content = read_log(
    "/project/logs/analysis.log",
    is_beta=True,
    output_format="core",
)

stata_help()

Get Stata command documentation.

macOS and Linux only

def stata_help(
    cmd: str,
    config_file: str | Path | None = None,
) -> str:
    ...

Parameters:

Parameter Type Default Description
cmd str required Stata command name
config_file str \| Path None Custom config file path

Returns: str (help text)

Example:

# Get help for a command
help_text = stata_help("regress")
print(help_text)

# Panel data commands
help_text = stata_help("xtreg")

write_dofile()

Create a do-file with Stata commands.

Note: This is a utility function. Modern agents have native file writing capabilities.

def write_dofile(
    content: str,
    encoding: str | None = None,
    config_file: str | Path | None = None,
) -> str:
    ...

Parameters:

Parameter Type Default Description
content str required Stata commands
encoding str None File encoding (defaults to UTF-8)
config_file str \| Path None Custom config file path

Returns: str (path to created do-file)

Example:

# Create a do-file
dofile_path = write_dofile("""
use "/data/survey.dta", clear
regress income age education
estat hettest
""")

# Execute it
result = stata_do(dofile_path)

Error Handling

All API functions return error information rather than raising exceptions:

# Check for errors in stata_do
result = stata_do("/path/to/analysis.do")
if "error" in result:
    print(f"Error: {result['error']}")
    # Handle error
else:
    print(result["log_content"]["text"])

# get_data_info returns error as string
info = get_data_info("/path/to/data.xyz")
if info.startswith("Unsupported") or info.startswith("Failed"):
    print(f"Error: {info}")
else:
    data = json.loads(info)

Integration Examples

Building a Custom Agent

from stata_mcp.api import stata_do, get_data_info

def analyze_dataset(data_path: str, dofile_path: str):
    # 1. Inspect data
    info = get_data_info(data_path)

    # 2. Execute analysis
    result = stata_do(dofile_path, is_read_log=True)

    # 3. Return results
    return {
        "data_info": info,
        "analysis_log": result.get("log_content", {}).get("text", ""),
    }

Batch Processing

from stata_mcp.api import stata_do, write_dofile

datasets = ["wave1.dta", "wave2.dta", "wave3.dta"]

for dataset in datasets:
    # Generate do-file for each dataset
    dofile = write_dofile(f"""
        use "/data/{dataset}", clear
        regress y x1 x2 x3
        outreg2 using "/output/{dataset}.xls", replace
    """)

    # Execute
    stata_do(dofile, log_file_name=f"analysis_{dataset}")