Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 22 additions & 6 deletions src/sourmash_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"Python utilities for sourmash plugins and scripts."
import sourmash
from sourmash.picklist import PickStyle
from sourmash import sourmash_args
from sourmash.cli import utils as sourmash_cli

Expand Down Expand Up @@ -76,13 +77,16 @@ def add_standard_minhash_args(parser):
sourmash_cli.add_scaled_arg(parser)


def create_minhash_from_args(args, *, track_abundance=False, **defaults):
def create_minhash_from_args(args=None, *, ksize=None, scaled=None, moltype=None, track_abundance=False, **defaults):
default_moltype = defaults.get('moltype')
moltype = sourmash_args.calculate_moltype(args, default=default_moltype)
ksize = args.ksize or defaults.get('ksize')

if args:
moltype = sourmash_args.calculate_moltype(args, default=default_moltype)
ksize = args.ksize or defaults.get('ksize')
scaled = args.scaled or defaults.get('scaled')

if not ksize:
ksize = DEFAULTS[moltype]['ksize']
scaled = args.scaled or defaults.get('scaled')
if not scaled:
scaled = DEFAULTS[moltype]['scaled']

Expand All @@ -92,15 +96,27 @@ def create_minhash_from_args(args, *, track_abundance=False, **defaults):
track_abundance=track_abundance)


def load_index_and_select(filename, minhash_obj, *, raise_on_empty=True):
def load_index_and_select(filename, minhash_obj, *, picklist=None, pickfile=None, coltype=None, colname=None, pickstyle=PickStyle.INCLUDE, raise_on_empty=True, args=None):
"""Load a sourmash Index object from filename,
selecting sketches compatible with minhash_obj.
"""
idx = sourmash.load_file_as_index(filename)

if args is not None:
pl = sourmash_args.load_picklist(args)
elif pickfile:
picklist_arg = f"{pickfile}:{colname}:{coltype}:{pickstyle.name.lower()}"
pl = sourmash_args.load_picklist(picklist=picklist_arg)
else:
pl = picklist

idx = idx.select(ksize=minhash_obj.ksize,
moltype=minhash_obj.moltype,
scaled=minhash_obj.scaled,
abund=minhash_obj.track_abundance)
abund=minhash_obj.track_abundance,
picklist=pl
)
print(idx)
if not idx:
raise ValueError(f"no matching sketches in '{filename}' for k={minhash_obj.ksize} moltype={minhash_obj.moltype} scaled={minhash_obj.scaled}")
return idx
10 changes: 10 additions & 0 deletions tests/podar-ref.picklist.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
accession,taxid,superkingdom,phylum,class,order,family,genus,species,strain
AE000782.1,224325,Archaea,Euryarchaeota,Archaeoglobi,Archaeoglobales,Archaeoglobaceae,Archaeoglobus,Archaeoglobus fulgidus,Archaeoglobus fulgidus DSM 4304
NC_000909.1,243232,Archaea,Euryarchaeota,Methanococci,Methanococcales,Methanocaldococcaceae,Methanocaldococcus,Methanocaldococcus jannaschii,Methanocaldococcus jannaschii DSM 2661
NC_003272.1,103690,Bacteria,Cyanobacteria,,Nostocales,Nostocaceae,Nostoc,Nostoc sp. PCC 7120,
AE009441.1,178306,Archaea,Crenarchaeota,Thermoprotei,Thermoproteales,Thermoproteaceae,Pyrobaculum,Pyrobaculum aerophilum,Pyrobaculum aerophilum str. IM2
AE009950.1,186497,Archaea,Euryarchaeota,Thermococci,Thermococcales,Thermococcaceae,Pyrococcus,Pyrococcus furiosus,Pyrococcus furiosus DSM 3638
AE009951.2,190304,Bacteria,Fusobacteria,Fusobacteriia,Fusobacteriales,Fusobacteriaceae,Fusobacterium,Fusobacterium nucleatum,
AE010299.1,188937,Archaea,Euryarchaeota,Methanomicrobia,Methanosarcinales,Methanosarcinaceae,Methanosarcina,Methanosarcina acetivorans,Methanosarcina acetivorans C2A
AE009439.1,190192,Archaea,Euryarchaeota,Methanopyri,Methanopyrales,Methanopyraceae,Methanopyrus,Methanopyrus kandleri,Methanopyrus kandleri AV19
NC_003911.12,246200,Bacteria,Proteobacteria,Alphaproteobacteria,Rhodobacterales,Rhodobacteraceae,Ruegeria,Ruegeria pomeroyi,Ruegeria pomeroyi DSS-3
Binary file added tests/podar-ref.zip
Binary file not shown.
24 changes: 24 additions & 0 deletions tests/test_sourmash_utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
from sourmash_utils import *
from sourmash_utils import create_minhash_from_args, load_index_and_select

import os


def test_basic():
Expand Down Expand Up @@ -59,3 +62,24 @@ def test_minhash_create_skipm2n3():
assert mh.moltype == 'skipm2n3'
assert mh.scaled == 1000
assert mh.ksize == 21


def test_load_index_and_select():
# test loading index and selecting without picklist
select_mh = create_minhash_from_args(ksize=31,scaled=1000,moltype='DNA', dna=True, abund=True)

here = os.path.dirname(__file__)
fpath = os.path.join(here, 'podar-ref.zip')
idx = load_index_and_select(filename=fpath, minhash_obj=select_mh)
assert idx

def test_load_index_and_select_picklist():
# test loading index and selecting with picklist
select_mh = create_minhash_from_args(ksize=31,scaled=1000,moltype='DNA',abund=True)

here = os.path.dirname(__file__)
fpath = os.path.join(here, 'podar-ref.zip')
ppath = os.path.join(here, 'podar-ref.picklist.csv')
idx = load_index_and_select(filename=fpath, minhash_obj=select_mh, pickfile=ppath, coltype='ident', colname='accession')
assert idx
assert len(idx) == 9