diff --git a/changelog.rst b/changelog.rst
index 772a17ee..ebb7847d 100644
--- a/changelog.rst
+++ b/changelog.rst
@@ -4,6 +4,14 @@ Changelog
The format is based on `Keep a Changelog `__.
This project adheres to `Semantic Versioning `__.
+v0.4.0 - xxxx-xx-xx
+-------------------
+
+Added
+~~~~~
+- Support for reading files via a context manager, text buffer, or string.
+
+
v0.3.1 - 2025-07-16
-------------------
diff --git a/parsnip/_errors.py b/parsnip/_errors.py
index d37f76cc..25f4cf99 100644
--- a/parsnip/_errors.py
+++ b/parsnip/_errors.py
@@ -1,6 +1,27 @@
# Copyright (c) 2025, The Regents of the University of Michigan
# This file is from the parsnip project, released under the BSD 3-Clause License.
+from pathlib import Path
+
+
+def _is_potentially_valid_path(file: str) -> bool:
+ """Check whether a file string could possibly be intended as a path.
+
+ This method returns true if the provided string is a valid path, whther the suffix
+ ".cif" is contained in the path, if the path links to a file, or if the path's
+ parent is a directory.
+ """
+ try:
+ path = Path(file)
+ return (
+ ".cif" in path.suffixes # Probably intended to parse as file
+ or path.exists() # If it is a file, we definitely want to parse that
+ # Possibly a typo, but we want to check that path regardless.
+ or (path.parent.is_dir() and path.parent != Path("."))
+ )
+ except OSError:
+ return False
+
class ParseWarning(Warning):
def __init__(self, message):
diff --git a/parsnip/parsnip.py b/parsnip/parsnip.py
index 37e0b217..6ab94a0c 100644
--- a/parsnip/parsnip.py
+++ b/parsnip/parsnip.py
@@ -74,13 +74,13 @@
from fnmatch import fnmatch
from importlib.util import find_spec
from pathlib import Path
-from typing import ClassVar
+from typing import ClassVar, TextIO
import numpy as np
from more_itertools import flatten, peekable
from numpy.lib.recfunctions import structured_to_unstructured
-from parsnip._errors import ParseWarning
+from parsnip._errors import ParseWarning, _is_potentially_valid_path
from parsnip.patterns import (
_accumulate_nonsimple_data,
_box_from_lengths_and_angles,
@@ -111,7 +111,7 @@ class CifFile:
>>> from parsnip import CifFile
>>> cif = CifFile("example_file.cif")
>>> print(cif)
- CifFile(fn=example_file.cif) : 12 data entries, 2 data loops
+ CifFile(file=example_file.cif) : 12 data entries, 2 data loops
Data entries are accessible via the :attr:`~.pairs` and :attr:`~.loops` attributes:
@@ -141,21 +141,38 @@ class CifFile:
Default value = ``False``
"""
- def __init__(self, fn: str | Path, cast_values: bool = False):
- """Create a CifFile object from a filename.
+ def __init__(
+ self, file: str | Path | TextIO | Iterable[str], cast_values: bool = False
+ ):
+ """Create a CifFile object from a filename, file object, or iterator over `str`.
On construction, the entire file is parsed into key-value pairs and data loops.
Comment lines are ignored.
"""
- self._fn = fn
+ self._fn = file
self._pairs = {}
self._loops = []
self._cpat = {k: re.compile(pattern) for (k, pattern) in self.PATTERNS.items()}
self._cast_values = cast_values
- with open(fn) as file:
+ if (isinstance(file, str) and _is_potentially_valid_path(file)) or isinstance(
+ file, Path
+ ):
+ with open(file) as file:
+ self._parse(peekable(file))
+ # We expect a TextIO | IOBase, but allow users to pass any Iterable[string_like]
+ # This includes a str that does not point to a file!
+ elif isinstance(file, str):
+ msg = (
+ "\nFile input was parsed as a raw CIF data block. "
+ "If you intended to read the input string as a file path, please "
+ "ensure it is validly formatted."
+ )
+ warnings.warn(msg, RuntimeWarning, stacklevel=2)
+ self._parse(peekable(file.splitlines(True)))
+ else:
self._parse(peekable(file))
_SYMPY_AVAILABLE = find_spec("sympy") is not None
@@ -919,7 +936,7 @@ def _parse(self, data_iter: Iterable):
def __repr__(self):
n_pairs = len(self.pairs)
n_tabs = len(self.loops)
- return f"CifFile(fn={self._fn}) : {n_pairs} data entries, {n_tabs} data loops"
+ return f"CifFile(file={self._fn}) : {n_pairs} data entries, {n_tabs} data loops"
PATTERNS: ClassVar = {
"key_value_general": r"^(_[\w\.\-/\[\d\]]+)\s+([^#]+)",
diff --git a/pyproject.toml b/pyproject.toml
index eb254a84..3d19f948 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -90,9 +90,9 @@ known-first-party = ["parsnip"]
[tool.ruff.lint.per-file-ignores]
"tests/*" = ["D", "B018", "F811"]
-"tests/conftest.py" = ["N816", "N806"]
"parsnip/*" = ["E741"]
"__init__.py" = ["F401"] # Do not remove "unused" imports in __init__.py files
+"tests/conftest.py" = ["N806", "N816"] # Allow mixed-case globals
[tool.ruff.format]
quote-style = "double"
diff --git a/tests/conftest.py b/tests/conftest.py
index 898614df..015c2c30 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,4 +1,3 @@
-# ruff: noqa: N816. Allow mixed-case global variables
from __future__ import annotations
import os
@@ -37,6 +36,18 @@ def remove_invalid(s):
return s.replace("\r", "")
+def _array_assertion_verbose(keys, test_data, real_data):
+ keys = np.asarray(keys)
+ test_data = np.asarray(test_data)
+ real_data = np.asarray(real_data)
+ msg = (
+ f"Key(s) {keys[test_data != real_data]} did not match:\n"
+ f"{test_data[test_data != real_data]}!="
+ f"{real_data[test_data != real_data]}\n"
+ )
+ np.testing.assert_equal(test_data, real_data, err_msg=msg)
+
+
def _gemmi_read_keys(filename, keys, as_number=True):
try:
file_block = cif.read_file(filename).sole_block()
diff --git a/tests/test_ciffile.py b/tests/test_ciffile.py
index 1c044278..f2849f08 100644
--- a/tests/test_ciffile.py
+++ b/tests/test_ciffile.py
@@ -1,8 +1,12 @@
+# ruff: noqa: SIM115
import re
+from pathlib import Path
+import numpy as np
import pytest
-from conftest import cif_files_mark
+from conftest import _array_assertion_verbose, cif_files_mark
+from parsnip import CifFile
from parsnip._errors import ParseWarning
@@ -28,3 +32,28 @@ def test_cast_values(cif_data):
cif_data.file._pairs = uncast_pairs # Need to reset the data
assert cif_data.file.pairs == uncast_pairs
+
+
+@pytest.mark.parametrize(
+ ("input_preprocessor", "expect_warning"),
+ [
+ (lambda fn: open(fn), None), # IOBase
+ (lambda fn: fn, None), # string file path
+ (lambda fn: Path(fn), None), # Path
+ (lambda fn: open(fn).readlines(), None), # list[str]
+ (lambda fn: open(fn).read(), RuntimeWarning), # raw string
+ ],
+)
+@cif_files_mark
+def test_open_methods(cif_data, input_preprocessor, expect_warning):
+ print(type(input_preprocessor(cif_data.filename)))
+ keys = [*cif_data.file.pairs.keys()]
+ stored_data = np.asarray([*cif_data.file.pairs.values()])
+
+ if expect_warning is not None:
+ with pytest.warns(expect_warning, match="parsed as a raw CIF data block."):
+ cif = CifFile(input_preprocessor(cif_data.filename))
+ else:
+ cif = CifFile(input_preprocessor(cif_data.filename))
+
+ _array_assertion_verbose(keys, cif.get_from_pairs(keys), stored_data)
diff --git a/tests/test_key_reader.py b/tests/test_key_reader.py
index 8abf5636..ea22b246 100644
--- a/tests/test_key_reader.py
+++ b/tests/test_key_reader.py
@@ -1,5 +1,6 @@
import numpy as np
from conftest import (
+ _array_assertion_verbose,
_gemmi_read_keys,
all_files_mark,
bad_cif,
@@ -9,18 +10,6 @@
from more_itertools import flatten
-def _array_assertion_verbose(keys, test_data, real_data):
- keys = np.asarray(keys)
- test_data = np.asarray(test_data)
- real_data = np.asarray(real_data)
- msg = (
- f"Key(s) {keys[test_data != real_data]} did not match:\n"
- f"{test_data[test_data != real_data]}!="
- f"{real_data[test_data != real_data]}\n"
- )
- np.testing.assert_equal(test_data, real_data, err_msg=msg)
-
-
@all_files_mark
def test_read_key_value_pairs(cif_data):
pycif = pycifrw_or_xfail(cif_data)
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 67d6b5d5..e13e3f1e 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,6 +1,8 @@
+from pathlib import Path
+
import pytest
-from parsnip._errors import ParseError, ParseWarning
+from parsnip._errors import ParseError, ParseWarning, _is_potentially_valid_path
def test_parse_error(capfd):
@@ -14,3 +16,19 @@ def test_parse_warning():
raise ParseWarning("TEST_WARNING_RAISED")
assert "TEST_WARNING_RAISED" in str(warning.value)
+
+
+@pytest.mark.parametrize(
+ ("path_str", "expected"),
+ [
+ (str(Path(__file__)), True), # existing file
+ (str(Path(__file__).parent / "conftest.py"), True), # real file
+ (str(Path(__file__).parent / "nonexistent.txt"), True), # parent dir exists
+ (str(Path(__file__).parent / "fake_file.cif"), True), # .cif suffix
+ (str(Path(__file__).parent / "asdf/noparent.txt"), False), # no parent
+ ("asdfasdfasd", False),
+ ("asdfasdfasd.cif", True),
+ ],
+)
+def test_is_potentially_valid_path(path_str, expected):
+ assert _is_potentially_valid_path(path_str) is expected