Skip to content

Commit 00f7e6a

Browse files
CopilotWeiTing1991
andcommitted
Revert data separation changes based on feedback - keep data in package
Co-authored-by: WeiTing1991 <[email protected]>
1 parent 08f31ad commit 00f7e6a

File tree

5 files changed

+25
-113
lines changed

5 files changed

+25
-113
lines changed

.gitignore

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,3 @@ temp/**
125125
docs/api/generated/
126126

127127
conda.recipe/
128-
129-
# Data directory - excluded from package, available separately as data archive
130-
data/

DATA_ARCHIVE.md

Lines changed: 0 additions & 46 deletions
This file was deleted.

pyproject.toml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -89,10 +89,6 @@ allow-direct-references = true
8989
packages = [
9090
"src/dcs",
9191
]
92-
exclude = [
93-
"data/",
94-
"data/**/*",
95-
]
9692

9793
[tool.ruff]
9894
indent-width = 2

src/dcs/utils/data_processing.py

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,26 +14,22 @@ def __init__(self, filename: str, data: dict | None = None):
1414
"""Initialize with filename and optional data.
1515
1616
Args:
17-
filename: Base filename for exported files.
18-
data: Initial data dictionary. Defaults to None.
17+
filename (str): Base filename for exported files.
18+
data (dict, optional): Initial data dictionary. Defaults to None.
1919
"""
2020
# Date
2121
now_data = datetime.now().date().strftime("%Y%m%d")
2222
here = os.path.dirname(__file__)
2323
home = os.path.abspath(os.path.join(here, "../../../"))
24-
data_dir = os.path.abspath(os.path.join(home, "data"))
24+
data = os.path.abspath(os.path.join(home, "data"))
2525

2626
self.__date = now_data
2727
self.default_filename = self.__date + "_" + filename
2828

29-
self.__data = data_dir
29+
self.__data = data
3030
json_dir = os.path.join(self.__data, "json")
3131
csv_dir = os.path.join(self.__data, "csv")
3232

33-
# Create directories if they don't exist
34-
os.makedirs(json_dir, exist_ok=True)
35-
os.makedirs(csv_dir, exist_ok=True)
36-
3733
self.filepath_json = json_dir
3834
self.filepath_csv = csv_dir
3935

@@ -42,16 +38,31 @@ def __init__(self, filename: str, data: dict | None = None):
4238

4339
@property
4440
def data_dict(self) -> dict:
45-
"""Get the current data dictionary."""
41+
"""Get the current data dictionary.
42+
43+
Returns:
44+
dict: The stored data dictionary.
45+
"""
4646
return self.data
4747

4848
@data_dict.setter
4949
def update_data(self, new_data: dict) -> None:
50-
"""Update the data dictionary."""
50+
"""Update the data dictionary.
51+
52+
Args:
53+
new_data (dict): New data dictionary to store.
54+
"""
5155
self.data = new_data
5256

5357
def __is_file_existed(self, filepath: str) -> bool:
54-
"""Check if file already exists."""
58+
"""Check if file already exists.
59+
60+
Args:
61+
filepath (str): Path to the file to check.
62+
63+
Returns:
64+
bool: True if file exists, False otherwise.
65+
"""
5566
if os.path.isfile(filepath):
5667
return True
5768
else:
@@ -84,7 +95,7 @@ def write_dict_to_csv(self, header: list) -> None:
8495
"""Export data dictionary to CSV file.
8596
8697
Args:
87-
header: List of column headers for the CSV file.
98+
header (list): List of column headers for the CSV file.
8899
89100
Raises:
90101
Exception: If the file already exists.

tasks.py

Lines changed: 2 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -55,12 +55,8 @@ def docs_clean(ctx):
5555

5656

5757
@task
58-
def build(ctx, with_data: bool = False):
59-
"""Build the package for distribution.
60-
61-
Args:
62-
with_data: If True, also creates the separate data archive
63-
"""
58+
def build(ctx):
59+
"""Build the package for distribution."""
6460
print("Building package...")
6561

6662
# Clean first
@@ -70,48 +66,6 @@ def build(ctx, with_data: bool = False):
7066
ctx.run("python -m build --no-isolation", pty=True)
7167
print("Package built successfully!")
7268

73-
if with_data:
74-
package_data(ctx)
75-
76-
77-
@task
78-
def package_data(ctx):
79-
"""Create a separate data archive for sample datasets and configurations.
80-
81-
Packages the data directory into a compressed archive that users can download
82-
separately to reduce the main package size.
83-
"""
84-
import tarfile
85-
from pathlib import Path
86-
87-
print("Creating data archive...")
88-
89-
# Clean first
90-
ctx.run("rm -f dist/dcs-sample-data-*.tar.gz", pty=True, warn=True)
91-
92-
# Create dist directory if it doesn't exist
93-
Path("dist").mkdir(exist_ok=True)
94-
95-
# Get version for archive naming
96-
version = "0.1.5" # Could be extracted from pyproject.toml
97-
archive_name = f"dist/dcs-sample-data-{version}.tar.gz"
98-
99-
# Create archive
100-
with tarfile.open(archive_name, "w:gz") as tar:
101-
tar.add("data", arcname="dcs-sample-data")
102-
103-
print(f"Data archive created: {archive_name}")
104-
105-
# Show archive contents summary
106-
with tarfile.open(archive_name, "r:gz") as tar:
107-
members = tar.getmembers()
108-
print(f"Archive contains {len(members)} files and directories")
109-
print("Archive contents preview:")
110-
for member in members[:10]: # Show first 10 entries
111-
print(f" {member.name}")
112-
if len(members) > 10:
113-
print(f" ... and {len(members) - 10} more files")
114-
11569

11670
@task
11771
def build_clean(ctx):

0 commit comments

Comments
 (0)