Skip to content

Commit 435653b

Browse files
committed
Add layout logic to replace duplicate files with hard links
1 parent 0d31ba8 commit 435653b

File tree

6 files changed

+236
-8
lines changed

6 files changed

+236
-8
lines changed

src/Layout/redist/targets/GenerateArchives.targets

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,15 @@
1313
<!-- Ensure output directories are created -->
1414
<MakeDir Directories="$(ArtifactsShippingPackagesDir);$(ArtifactsNonShippingPackagesDir)" />
1515

16-
<!-- Create .tar.gz files on Linux/MacOS, and .zip files on Windows -->
16+
<!-- Create .zip files on Windows -->
1717
<ZipFileCreateFromDirectory
1818
Condition=" '$(OSName)' == 'win' "
1919
SourceDirectory="$(RedistInstallerLayoutPath)"
2020
DestinationArchive="$(ArtifactsShippingPackagesDir)$(ArtifactNameWithVersionCombinedHostHostFxrFrameworkSdk).zip"
2121
OverwriteDestination="true" />
2222

23+
<!-- Create .tar.gz files on all platforms -->
2324
<TarGzFileCreateFromDirectory
24-
Condition=" '$(OSName)' != 'win' "
2525
SourceDirectory="$(RedistInstallerLayoutPath)"
2626
DestinationArchive="$(ArtifactsShippingPackagesDir)$(ArtifactNameWithVersionCombinedHostHostFxrFrameworkSdk).tar.gz"
2727
OverwriteDestination="true"

src/Layout/redist/targets/GenerateInstallerLayout.targets

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,10 @@
5555
<SdkOutputFileDestination Include="@(SdkOutputFile->'$(RedistInstallerLayoutPath)sdk\$(Version)\%(RecursiveDir)%(Filename)%(Extension)')" />
5656
</ItemGroup>
5757

58-
<Copy SourceFiles="@(SdkOutputFile)" DestinationFiles="@(SdkOutputFileDestination)" SkipUnchangedFiles="true" />
58+
<Copy SourceFiles="@(SdkOutputFile)"
59+
DestinationFiles="@(SdkOutputFileDestination)"
60+
SkipUnchangedFiles="true"
61+
UseHardLinksIfPossible="true"/>
5962
</Target>
6063

6164
<Target Name="LayoutWorkloadUserLocalMarker"

src/Layout/redist/targets/GenerateLayout.targets

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -538,9 +538,8 @@
538538

539539
<RemoveDir Directories="$(OutputPath)/15.0" />
540540
</Target>
541-
542-
<!-- Anything that goes into the sdk/$(Version) directory. -->
543-
<Target Name="GenerateSdkLayout"
541+
<!-- Core layout generation - ‌anything·‌that·‌goes·‌into·‌the·‌sdk/$(Version)·‌directory. -->
542+
<Target Name="GenerateSdkLayoutCore"
544543
DependsOnTargets="PublishRuntimeIdentifierGraphFiles;
545544
GenerateSdkRuntimeIdentifierChain;
546545
PublishVersionFile;
@@ -564,7 +563,17 @@
564563
DeleteSymbolsFromPublishDir;
565564
RetargetTools;
566565
RemoveResourcesFromDotnetDeps;
567-
ChmodPublishDir"
566+
ChmodPublishDir" />
567+
568+
<!-- Deduplicate files after all layout work is complete -->
569+
<Target Name="DeduplicateLayoutFiles"
570+
DependsOnTargets="GenerateSdkLayoutCore">
571+
<DeduplicateFilesWithHardLinks LayoutDirectory="$(OutputPath)" />
572+
</Target>
573+
574+
<!-- Generates the complete SDK layout - everything in sdk/$(Version) -->
575+
<Target Name="GenerateSdkLayout"
576+
DependsOnTargets="DeduplicateLayoutFiles"
568577
AfterTargets="AfterBuild" />
569578

570579
</Project>
Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
#if !NETFRAMEWORK
5+
using System;
6+
using System.Collections.Generic;
7+
using System.IO;
8+
using System.Linq;
9+
using System.Runtime.InteropServices;
10+
using System.Security.Cryptography;
11+
12+
namespace Microsoft.DotNet.Build.Tasks
13+
{
14+
/// <summary>
15+
/// Deduplicates files in a directory by replacing duplicates with hardlinks.
16+
/// Files are grouped by content hash, and a deterministic "master" file is selected
17+
/// (closest to root, alphabetically first). All other duplicates are replaced with hardlinks.
18+
/// </summary>
19+
public sealed class DeduplicateFilesWithHardLinks : Task
20+
{
21+
/// <summary>
22+
/// The root directory to scan for duplicate files.
23+
/// </summary>
24+
[Required]
25+
public string LayoutDirectory { get; set; } = null!;
26+
27+
/// <summary>
28+
/// Minimum file size in bytes to consider for deduplication (default: 1024).
29+
/// Small files have minimal impact on archive size.
30+
/// </summary>
31+
public int MinimumFileSize { get; set; } = 1024;
32+
33+
[Output]
34+
public int FilesDeduplicatedCount { get; set; }
35+
36+
[Output]
37+
public long BytesSaved { get; set; }
38+
39+
public override bool Execute()
40+
{
41+
if (!Directory.Exists(LayoutDirectory))
42+
{
43+
Log.LogError($"LayoutDirectory '{LayoutDirectory}' does not exist.");
44+
return false;
45+
}
46+
47+
Log.LogMessage(MessageImportance.High, $"Scanning for duplicate files in '{LayoutDirectory}'...");
48+
49+
// Find all eligible files
50+
var files = Directory.GetFiles(LayoutDirectory, "*", SearchOption.AllDirectories)
51+
.Where(f => new FileInfo(f).Length >= MinimumFileSize)
52+
.ToList();
53+
54+
Log.LogMessage(MessageImportance.Normal, $"Found {files.Count} files eligible for deduplication (>= {MinimumFileSize} bytes).");
55+
56+
if (files.Count == 0)
57+
{
58+
return true;
59+
}
60+
61+
// Hash all files and group by hash
62+
var filesByHash = new Dictionary<string, List<FileEntry>>();
63+
64+
foreach (var filePath in files)
65+
{
66+
try
67+
{
68+
var fileInfo = new FileInfo(filePath);
69+
var hash = ComputeFileHash(filePath);
70+
var entry = new FileEntry
71+
{
72+
Path = filePath,
73+
Hash = hash,
74+
Size = fileInfo.Length,
75+
Depth = GetPathDepth(filePath, LayoutDirectory)
76+
};
77+
78+
if (!filesByHash.ContainsKey(hash))
79+
{
80+
filesByHash[hash] = new List<FileEntry>();
81+
}
82+
83+
filesByHash[hash].Add(entry);
84+
}
85+
catch (Exception ex)
86+
{
87+
Log.LogWarning($"Failed to hash file '{filePath}': {ex.Message}");
88+
}
89+
}
90+
91+
// Process groups with duplicates
92+
var duplicateGroups = filesByHash.Values.Where(g => g.Count > 1).ToList();
93+
94+
Log.LogMessage(MessageImportance.Normal, $"Found {duplicateGroups.Count} groups of duplicate files.");
95+
96+
int totalFilesDeduped = 0;
97+
long totalBytesSaved = 0;
98+
99+
foreach (var group in duplicateGroups)
100+
{
101+
// Sort deterministically: by depth (ascending), then alphabetically
102+
var sorted = group.OrderBy(f => f.Depth).ThenBy(f => f.Path).ToList();
103+
104+
// First file is the "master"
105+
var master = sorted[0];
106+
var duplicates = sorted.Skip(1).ToList();
107+
108+
Log.LogMessage(MessageImportance.Low, $"Master file: {master.Path}");
109+
110+
foreach (var duplicate in duplicates)
111+
{
112+
try
113+
{
114+
if (CreateHardLink(duplicate.Path, master.Path))
115+
{
116+
totalFilesDeduped++;
117+
totalBytesSaved += duplicate.Size;
118+
Log.LogMessage(MessageImportance.Low, $" Linked: {duplicate.Path}");
119+
}
120+
}
121+
catch (Exception ex)
122+
{
123+
Log.LogWarning($"Failed to create hardlink from '{duplicate.Path}' to '{master.Path}': {ex.Message}");
124+
}
125+
}
126+
}
127+
128+
FilesDeduplicatedCount = totalFilesDeduped;
129+
BytesSaved = totalBytesSaved;
130+
131+
Log.LogMessage(MessageImportance.High,
132+
$"Deduplication complete: {totalFilesDeduped} files replaced with hardlinks, saving {totalBytesSaved / (1024.0 * 1024.0):F2} MB.");
133+
134+
return true;
135+
}
136+
137+
private string ComputeFileHash(string filePath)
138+
{
139+
using var sha256 = SHA256.Create();
140+
using var stream = File.OpenRead(filePath);
141+
var hashBytes = sha256.ComputeHash(stream);
142+
return BitConverter.ToString(hashBytes).Replace("-", "").ToLowerInvariant();
143+
}
144+
145+
private int GetPathDepth(string filePath, string rootDirectory)
146+
{
147+
var relativePath = Path.GetRelativePath(rootDirectory, filePath);
148+
return relativePath.Split(Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar).Length - 1;
149+
}
150+
151+
private bool CreateHardLink(string duplicateFilePath, string masterFilePath)
152+
{
153+
// TODO: Replace P/Invoke with File.CreateHardLink() when SDK targets .NET 11+
154+
// See: https://github.com/dotnet/runtime/issues/69030
155+
156+
// Delete the duplicate file first
157+
File.Delete(duplicateFilePath);
158+
159+
// Create hardlink
160+
if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
161+
{
162+
return CreateHardLinkWindows(duplicateFilePath, masterFilePath);
163+
}
164+
else
165+
{
166+
return CreateHardLinkUnix(duplicateFilePath, masterFilePath);
167+
}
168+
}
169+
170+
private bool CreateHardLinkWindows(string linkPath, string targetPath)
171+
{
172+
bool result = CreateHardLinkWin32(linkPath, targetPath, IntPtr.Zero);
173+
if (!result)
174+
{
175+
int errorCode = Marshal.GetLastWin32Error();
176+
throw new InvalidOperationException($"CreateHardLink failed with error code {errorCode}");
177+
}
178+
return result;
179+
}
180+
181+
private bool CreateHardLinkUnix(string linkPath, string targetPath)
182+
{
183+
int result = link(targetPath, linkPath);
184+
if (result != 0)
185+
{
186+
int errorCode = Marshal.GetLastWin32Error();
187+
throw new InvalidOperationException($"link() failed with error code {errorCode}");
188+
}
189+
return true;
190+
}
191+
192+
// P/Invoke declarations
193+
[DllImport("kernel32.dll", EntryPoint = "CreateHardLinkW", CharSet = CharSet.Unicode, SetLastError = true)]
194+
private static extern bool CreateHardLinkWin32(
195+
string lpFileName,
196+
string lpExistingFileName,
197+
IntPtr lpSecurityAttributes);
198+
199+
[DllImport("libc", SetLastError = true)]
200+
private static extern int link(string oldpath, string newpath);
201+
202+
private class FileEntry
203+
{
204+
public required string Path { get; set; }
205+
public required string Hash { get; set; }
206+
public long Size { get; set; }
207+
public int Depth { get; set; }
208+
}
209+
}
210+
}
211+
#endif

src/Tasks/sdk-tasks/ReplaceFilesWithSymbolicLinks.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
namespace Microsoft.DotNet.Build.Tasks
1818
{
1919
/// <summary>
20-
/// Replaces files that have the same content with hard links.
20+
/// Replaces files that have the same content with symbolic links.
2121
/// </summary>
2222
public sealed class ReplaceFilesWithSymbolicLinks : Task
2323
{

src/Tasks/sdk-tasks/sdk-tasks.InTree.targets

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,11 @@
3535
<UsingTask TaskName="UpdateRuntimeConfig" AssemblyFile="$(SdkTasksAssembly)" TaskFactory="TaskHostFactory" />
3636
<UsingTask TaskName="ZipFileCreateFromDirectory" AssemblyFile="$(SdkTasksAssembly)" TaskFactory="TaskHostFactory" />
3737

38+
<UsingTask TaskName="DeduplicateFilesWithHardLinks"
39+
Condition="'$(MSBuildRuntimeType)' == 'Core'"
40+
AssemblyFile="$(SdkTasksAssembly)"
41+
TaskFactory="TaskHostFactory" />
42+
3843
<!-- Tasks from the Arcade SDK -->
3944
<UsingTask TaskName="DownloadFile" AssemblyFile="$(ArcadeSdkBuildTasksAssembly)" />
4045

0 commit comments

Comments
 (0)