Skip to content

Commit 722008f

Browse files
committed
Switch to custom static lib extraction to keep duplicate object files.
1 parent b65e2c0 commit 722008f

File tree

4 files changed

+194
-12
lines changed

4 files changed

+194
-12
lines changed

Lib/__np__/darwin.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@ def rename_symbols_in_file(target_lib, prefix, protected_symbols = []):
1010
import __np__.packaging
1111
__np__.packaging.install_build_tool("clang")
1212
with tempfile.TemporaryDirectory() as tmpdir:
13-
subprocess.run(["ar", "-x", target_lib], cwd=tmpdir)
13+
import __np__.tools.extract_ar
14+
__np__.tools.extract_ar.extract_archive(target_lib, tmpdir)
1415
obj_list = []
1516
known_symbols = set()
1617
unmatched_symbols = set()
@@ -64,7 +65,8 @@ def rename_init_symbol_in_file(target_lib):
6465
for chunk in iter(lambda: f.read(65536), b""):
6566
hasher.update(chunk)
6667
file_hash = hasher.hexdigest()
67-
subprocess.run(["ar", "-x", target_lib], cwd=tmpdir)
68+
import __np__.tools.extract_ar
69+
__np__.tools.extract_ar.extract_archive(target_lib, tmpdir)
6870
obj_list = []
6971
for obj in os.listdir(tmpdir):
7072
if obj.endswith(".o"):
@@ -104,7 +106,8 @@ def rename_init_symbol_in_file(target_lib):
104106

105107
def remove_symbols_in_file(target_lib, object_file, symbols):
106108
with tempfile.TemporaryDirectory() as tmpdir:
107-
subprocess.run(["ar", "-x", target_lib], cwd=tmpdir)
109+
import __np__.tools.extract_ar
110+
__np__.tools.extract_ar.extract_archive(target_lib, tmpdir)
108111

109112
obj_list = [os.path.join(tmpdir, x) for x in os.listdir(tmpdir) if x.endswith(".o")]
110113

Lib/__np__/linux.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ def rename_symbols_in_file(target_lib, prefix, protected_symbols=[]):
4949
import __np__.packaging
5050
__np__.packaging.install_build_tool("clang")
5151
with tempfile.TemporaryDirectory() as tmpdir:
52-
run("ar", "-x", target_lib, cwd=tmpdir)
52+
import __np__.tools.extract_ar
53+
__np__.tools.extract_ar.extract_archive(target_lib, tmpdir)
5354
obj_list = []
5455
known_symbols = set()
5556
unmatched_symbols = set()
@@ -109,7 +110,8 @@ def rename_init_symbol_in_file(target_lib):
109110
hasher.update(chunk)
110111
file_hash = hasher.hexdigest()
111112

112-
run("ar", "-x", target_lib, cwd=tmpdir)
113+
import __np__.tools.extract_ar
114+
__np__.tools.extract_ar.extract_archive(target_lib, tmpdir)
113115
obj_list_paths = []
114116
obj_filenames = []
115117

@@ -157,7 +159,8 @@ def rename_init_symbol_in_file(target_lib):
157159

158160
def remove_symbols_in_file(target_lib, object_file_to_modify, symbols_to_remove):
159161
with tempfile.TemporaryDirectory() as tmpdir:
160-
run("ar", "-x", target_lib, cwd=tmpdir)
162+
import __np__.tools.extract_ar
163+
__np__.tools.extract_ar.extract_archive(target_lib, tmpdir)
161164

162165
all_obj_filenames = [fn for fn in os.listdir(tmpdir) if fn.endswith(".o")]
163166
target_obj_path = os.path.join(tmpdir, object_file_to_modify)

Lib/__np__/tools/extract_ar.py

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
4+
import os
5+
import sys
6+
import struct
7+
8+
AR_MAGIC = b'!<arch>\n'
9+
FAT_MAGIC_BE = b'\xca\xfe\xba\xbe'
10+
FAT_MAGIC_LE = b'\xbe\xba\xfe\xca'
11+
MEMBER_HEADER_SIZE = 60
12+
13+
14+
def _make_unique_filename(base_path):
15+
"""
16+
Generates a unique filename if the base path already exists.
17+
Appends '_1', '_2', etc., before the extension until an unused name is found.
18+
"""
19+
if not os.path.exists(base_path):
20+
return base_path
21+
22+
directory, filename = os.path.split(base_path)
23+
name, ext = os.path.splitext(filename)
24+
25+
counter = 1
26+
while True:
27+
new_filename = f"{name}_{counter}{ext}"
28+
new_path = os.path.join(directory, new_filename)
29+
if not os.path.exists(new_path):
30+
return new_path
31+
counter += 1
32+
33+
34+
class ArchiveMemberHeader:
35+
36+
def __init__(self, header_bytes):
37+
if len(header_bytes) != MEMBER_HEADER_SIZE:
38+
raise ValueError("Header must be 60 bytes long.")
39+
40+
self.raw_name = header_bytes[0:16]
41+
self.mtime = int(header_bytes[16:28].strip())
42+
self.uid = int(header_bytes[28:34].strip())
43+
self.gid = int(header_bytes[34:40].strip())
44+
self.mode = int(header_bytes[40:48].strip(), 8)
45+
self.size = int(header_bytes[48:58].strip())
46+
self.fmag = header_bytes[58:60]
47+
48+
if self.fmag != b'\x60\x0a':
49+
raise ValueError("Invalid member header magic value.")
50+
51+
52+
def extract_archive(archive_path, output_dir="."):
53+
54+
try:
55+
os.makedirs(output_dir, exist_ok=True)
56+
except OSError as e:
57+
print(f"Error: Could not create output directory '{output_dir}': {e}", file=sys.stderr)
58+
return False
59+
60+
try:
61+
with open(archive_path, 'rb') as f:
62+
magic = f.read(8)
63+
if magic.startswith(FAT_MAGIC_BE) or magic.startswith(FAT_MAGIC_LE):
64+
print(f"Error: '{archive_path}' is a macOS universal ('fat') binary.", file=sys.stderr)
65+
print("This utility is designed for standard 'ar' archives, not fat containers.", file=sys.stderr)
66+
return False
67+
68+
if magic != AR_MAGIC:
69+
print(f"Error: '{archive_path}' is not a valid 'ar' archive file.", file=sys.stderr)
70+
return False
71+
72+
long_names_table = None
73+
extracted_filenames = set()
74+
75+
while True:
76+
current_pos = f.tell()
77+
header_bytes = f.read(MEMBER_HEADER_SIZE)
78+
79+
if not header_bytes:
80+
# End of file
81+
break
82+
83+
if len(header_bytes) < MEMBER_HEADER_SIZE:
84+
print(f"Warning: Truncated header found at offset {current_pos}. Stopping.", file=sys.stderr)
85+
break
86+
87+
try:
88+
header = ArchiveMemberHeader(header_bytes)
89+
except (ValueError, IndexError) as e:
90+
print(f"Error parsing member header at offset {current_pos}: {e}", file=sys.stderr)
91+
return False
92+
93+
filename = ""
94+
data_size = header.size
95+
96+
# 4. Resolve filename
97+
raw_name_str = header.raw_name.strip().decode('ascii', 'ignore')
98+
99+
if raw_name_str.startswith('#1/'):
100+
# BSD-style long filename
101+
try:
102+
name_len = int(raw_name_str[3:])
103+
filename = f.read(name_len).decode('ascii').rstrip('\x00')
104+
data_size -= name_len
105+
except (ValueError, IndexError):
106+
print(f"Error parsing BSD-style long name at offset {current_pos}.", file=sys.stderr)
107+
return False
108+
elif raw_name_str.startswith('/'):
109+
# GNU/SysV-style long filename
110+
if long_names_table is None:
111+
print(f"Error: Found GNU-style long name reference at offset {current_pos} "
112+
"but no long name table ('//') has been seen yet.", file=sys.stderr)
113+
return False
114+
try:
115+
offset = int(raw_name_str[1:])
116+
end_offset = long_names_table.find(b'/\n', offset)
117+
if end_offset == -1:
118+
end_offset = len(long_names_table) # some variants might not use the trailer
119+
filename = long_names_table[offset:end_offset].decode('ascii')
120+
except (ValueError, IndexError):
121+
print(f"Error parsing GNU-style long name at offset {current_pos}.", file=sys.stderr)
122+
return False
123+
else:
124+
# Standard short filename
125+
filename = raw_name_str.rstrip('/')
126+
127+
# Handle special members
128+
if filename == '/' or filename.startswith('__.SYMDEF'):
129+
# Symbol table, skip
130+
print(f" Skipping symbol table member: '{filename}'")
131+
elif filename == '//':
132+
# Found GNU/SysV long names table
133+
long_names_table = f.read(data_size)
134+
else:
135+
if not filename:
136+
print(f"Warning: Skipping member with empty name at offset {current_pos}.", file=sys.stderr)
137+
else:
138+
base_output_path = os.path.join(output_dir, filename)
139+
output_path = _make_unique_filename(base_output_path)
140+
141+
try:
142+
with open(output_path, 'wb') as out_f:
143+
out_f.write(f.read(data_size))
144+
extracted_filenames.add(filename)
145+
except IOError as e:
146+
print(f"Error writing file '{output_path}': {e}", file=sys.stderr)
147+
# Don't halt, try to extract other members
148+
f.seek(data_size, 1) # Skip over data
149+
150+
# 7. Advance to the next member, accounting for padding
151+
# Data must be padded to an even byte boundary
152+
if header.size % 2 != 0:
153+
f.seek(1, 1) # Skip 1 padding byte
154+
155+
except FileNotFoundError:
156+
print(f"Error: Archive file not found at '{archive_path}'", file=sys.stderr)
157+
return False
158+
except IOError as e:
159+
print(f"Error reading archive file '{archive_path}': {e}", file=sys.stderr)
160+
return False
161+
162+
print("Extraction complete.")
163+
return True
164+
165+
166+
if __name__ == "__main__":
167+
if len(sys.argv) not in [2, 3]:
168+
print(f"Usage: {sys.argv} <archive_file> [output_directory]")
169+
print("Extracts all members from a static archive (.a,.lib) to the specified directory.")
170+
sys.exit(1)
171+
172+
archive_file = sys.argv[1]
173+
output_dir = sys.argv[2] if len(sys.argv) == 3 else "."
174+
175+
if not extract_archive(archive_file, output_dir):
176+
sys.exit(1)

Lib/__np__/windows.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -148,10 +148,10 @@ def rename_symbols_in_file(target_lib, prefix, protected_symbols=None):
148148
protected_symbols = []
149149
import __np__.packaging
150150
__np__.packaging.install_build_tool("clang")
151-
__np__.packaging.install_build_tool("7zip")
152151
target_lib_abs = os.path.abspath(target_lib)
153152
with tempfile.TemporaryDirectory() as tmpdir:
154-
run_build_tool_exe("7zip", "7z.exe", "e", target_lib_abs, "-aou", "-o" + tmpdir, cwd=tmpdir)
153+
import __np__.tools.extract_ar
154+
__np__.tools.extract_ar.extract_archive(target_lib_abs, tmpdir)
155155
obj_list = []
156156
known_symbols = set()
157157
unmatched_symbols = set()
@@ -195,7 +195,6 @@ def rename_symbols_in_file(target_lib, prefix, protected_symbols=None):
195195
def rename_init_symbol_in_file(target_lib):
196196
import __np__.packaging
197197
__np__.packaging.install_build_tool("clang")
198-
__np__.packaging.install_build_tool("7zip")
199198
target_lib_abs = os.path.abspath(target_lib)
200199
with tempfile.TemporaryDirectory() as tmpdir:
201200
hasher = hashlib.md5()
@@ -204,7 +203,8 @@ def rename_init_symbol_in_file(target_lib):
204203
hasher.update(chunk)
205204
file_hash = hasher.hexdigest()
206205

207-
run_build_tool_exe("7zip", "7z.exe", "e", target_lib_abs, "-aou", "-o" + tmpdir, cwd=os.getcwd())
206+
import __np__.tools.extract_ar
207+
__np__.tools.extract_ar.extract_archive(target_lib_abs, tmpdir)
208208

209209
obj_paths_in_tmpdir = []
210210
modified_any_obj = False
@@ -266,9 +266,9 @@ def remove_symbols_in_file(target_lib, object_file, symbols):
266266
target_lib_abs = os.path.abspath(target_lib)
267267
import __np__.packaging
268268
__np__.packaging.install_build_tool("clang")
269-
__np__.packaging.install_build_tool("7zip")
270269
with tempfile.TemporaryDirectory() as tmpdir:
271-
run_build_tool_exe("7zip", "7z.exe", "e", target_lib_abs, "-aou", "-o" + tmpdir, cwd=tmpdir)
270+
import __np__.tools.extract_ar
271+
__np__.tools.extract_ar.extract_archive(target_lib_abs, tmpdir)
272272

273273
obj_list = [os.path.join(tmpdir, x) for x in os.listdir(tmpdir) if x.endswith(".obj")]
274274

0 commit comments

Comments
 (0)