Skip to content

Commit d4e5de5

Browse files
author
Aaron Su
committed
refactor: sample dataset location
1 parent a1d0c49 commit d4e5de5

File tree

182 files changed

+10
-17
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

182 files changed

+10
-17
lines changed

.gitattributes

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,6 @@
22
# Only track large binary files in sample_dataset to allow selective downloads
33

44
# Track specific file types in sample_dataset only
5-
training/gr00t/sample_dataset/**/*.mp4 filter=lfs diff=lfs merge=lfs -text
6-
training/gr00t/sample_dataset/**/*.parquet filter=lfs diff=lfs merge=lfs -text
7-
training/gr00t/sample_dataset/**/*.jsonl filter=lfs diff=lfs merge=lfs -text
5+
training/sample_dataset/**/*.mp4 filter=lfs diff=lfs merge=lfs -text
6+
training/sample_dataset/**/*.parquet filter=lfs diff=lfs merge=lfs -text
7+
training/sample_dataset/**/*.jsonl filter=lfs diff=lfs merge=lfs -text

training/gr00t/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ ENV DEBIAN_FRONTEND=noninteractive
1313
# System dependencies - consolidated for better layer caching
1414
RUN apt-get update && apt-get install -y --no-install-recommends \
1515
# Core utilities
16-
wget curl ca-certificates \
16+
wget curl ca-certificates unzip \
1717
# Git and version control
1818
git git-lfs \
1919
# Build essentials

training/gr00t/README.md

Lines changed: 0 additions & 1 deletion

training/gr00t/infra/README.md

Lines changed: 4 additions & 10 deletions

training/gr00t/run_finetune_workflow.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
# 1) DATASET_LOCAL_DIR (use dataset bundled or pre-mounted)
1414
# 2) DATASET_S3_URI (sync from S3 URI s3://bucket/path)
1515
# 3) HF_DATASET_ID (download from Hugging Face Datasets)
16-
# 4) Sample dataset (git clone with Git LFS): /workspace/sample-embodied-ai-platform/training/gr00t/sample_dataset
16+
# 4) Sample dataset (git clone with Git LFS): /workspace/sample-embodied-ai-platform/training/sample_dataset
1717

1818
set -e # Exit on any error
1919

@@ -147,7 +147,7 @@ mkdir -p "$OUTPUT_DIR" || true
147147

148148
# Resolve dataset source according to priority and ensure accessibility
149149
SAMPLE_REPO_DIR="/workspace/sample-embodied-ai-platform"
150-
DEFAULT_SAMPLE_DATASET_DIR="$SAMPLE_REPO_DIR/training/gr00t/sample_dataset"
150+
DEFAULT_SAMPLE_DATASET_DIR="$SAMPLE_REPO_DIR/training/sample_dataset"
151151
RESOLVED_DATASET_DIR=""
152152

153153
echo "[Step] Resolve dataset source (priority: local -> s3 -> hf -> sample)"
File renamed without changes.

0 commit comments

Comments
 (0)