From 54edb74166b12543ad819e25c35ed4920b78b8c2 Mon Sep 17 00:00:00 2001 From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com> Date: Sun, 2 Feb 2025 19:44:17 +0000 Subject: [PATCH 01/26] refactor codebase --- RobloxUltimateScraper/CommandLineConfig.cs | 198 +++++++++++ RobloxUltimateScraper/Config.cs | 319 ++---------------- .../Enums/CompressionType.cs | 28 ++ RobloxUltimateScraper/Enums/IndexType.cs | 23 ++ RobloxUltimateScraper/Enums/OutputType.cs | 40 +++ RobloxUltimateScraper/Enums/ScraperType.cs | 33 ++ RobloxUltimateScraper/FileWriter.cs | 7 +- RobloxUltimateScraper/Http.cs | 47 +++ RobloxUltimateScraper/Models/AssetInput.cs | 14 - RobloxUltimateScraper/Models/AssetOutput.cs | 20 +- RobloxUltimateScraper/Program.cs | 68 ++-- RobloxUltimateScraper/Scraper.cs | 213 +++++------- 12 files changed, 501 insertions(+), 509 deletions(-) create mode 100644 RobloxUltimateScraper/CommandLineConfig.cs create mode 100644 RobloxUltimateScraper/Enums/CompressionType.cs create mode 100644 RobloxUltimateScraper/Enums/IndexType.cs create mode 100644 RobloxUltimateScraper/Enums/OutputType.cs create mode 100644 RobloxUltimateScraper/Enums/ScraperType.cs create mode 100644 RobloxUltimateScraper/Http.cs delete mode 100644 RobloxUltimateScraper/Models/AssetInput.cs diff --git a/RobloxUltimateScraper/CommandLineConfig.cs b/RobloxUltimateScraper/CommandLineConfig.cs new file mode 100644 index 0000000..e53ba6a --- /dev/null +++ b/RobloxUltimateScraper/CommandLineConfig.cs @@ -0,0 +1,198 @@ +using CommandLine; +using RobloxUltimateScraper.Enums; + +namespace RobloxUltimateScraper +{ + /// + /// Scraper configuration + /// + internal class CommandLineConfig + { + /// + /// Selected scraper type. + /// + public ScraperType Scraper { get; set; } = ScraperType.None; + + /// + /// Asset to scrape. + /// Should be used with scraper types . + /// + public ulong ScraperAssetId { get; set; } = 0; + + /// + /// Asset list to scrape. + /// Should be used with scraper types and . + /// + public string ScraperListPath { get; set; } = string.Empty; + + /// + /// Asset scrape start range. + /// Should be used with scraper types . + /// + public ulong ScraperStartRange { get; set; } = 0; + + /// + /// Asset scrape end range. + /// Should be used with scraper types . + /// + public ulong ScraperEndRange { get; set; } = 0; + + /// + /// Use the asset scraper. + /// COMMAND LINE USE ONLY! + /// + [Option('a', "asset", Required = false, HelpText = "Use the asset scraper. Parameter takes in an ID.")] + public ulong UseAssetScraper + { + set + { + Scraper = ScraperType.Asset; + ScraperAssetId = value; + } + } + + /// + /// Use the asset list scraper. + /// COMMAND LINE USE ONLY! + /// + [Option('l', "list", Required = false, HelpText = "Use the asset list scraper. Parameter takes in a list path. WIP!")] + public string UseListScraper + { + set + { + Scraper = ScraperType.List; + ScraperListPath = value; + } + } + + /// + /// Use the asset list versions scraper. + /// COMMAND LINE USE ONLY! + /// + [Option("listversions", Required = false, HelpText = "Use the asset list version scraper. Parameter takes in a list path. WIP!")] + public string UseListVersionsScraper + { + set + { + Scraper = ScraperType.ListVersions; + ScraperListPath = value; + } + } + + /// + /// Use the asset range scraper. + /// COMMAND LINE USE ONLY! + /// + [Option('r', "range", Required = false, HelpText = "Use the asset range scraper. Parameter takes in [Start ID]-[End ID]. WIP!")] + public string UseRangeScraper + { + set + { + Scraper = ScraperType.Range; + + // parse input + string[] segments = value.Split('-'); + + if (segments.Length != 2) + throw new ArgumentException("Parameter is not in valid format."); + + if (!ulong.TryParse(segments[0], out ulong startRange)) + throw new ArgumentException("Start range is not an integer."); + + if (!ulong.TryParse(segments[1], out ulong endRange)) + throw new ArgumentException("End range is not an integer."); + + ScraperStartRange = startRange; + ScraperEndRange = endRange; + } + } + + /// + /// Assets output type. + /// + [Option('o', "output", Required = false, Default = OutputType.Both, HelpText = "Assets output type. (Files, Index, Console, Both)")] + public OutputType OutputType { get; set; } = OutputType.Both; + + /// + /// Index type. + /// + [Option('i', "index", Required = false, Default = IndexType.All, HelpText = "Index type. (Text, Json, All)")] + public IndexType IndexType { get; set; } = IndexType.All; + + /// + /// Asset compression type. + /// + [Option('c', "compression", Required = false, Default = CompressionType.None, HelpText = "Compression type. (None, GZip, Bzip2, Zstd)")] + public CompressionType CompressionType { get; set; } = CompressionType.None; + + [Option("compressionlevel", Required = false, Default = 9, HelpText = "Compression level for the compression. Only works for BZip2 (1-9) and Zstd (1-22). Other name: --cl.")] + public int CompressionLevelArg { get; set; } = 9; // 9 is good for both BZip2 and Zstd + + // this sucks but commandlineparser has no way to set multiple names for an argument + // and short arguments are only allowed to be a single character + [Option("cl", Required = false, Hidden = true)] + public int? CompressionLevelArgOtherName { get; set; } + + /// + /// Assets output directory. + /// + [Option('d', "directory", Required = false, HelpText = "Assets output directory.")] + public string OutputDirectory { get; set; } = ""; + + /// + /// Assets output extension. + /// + [Option('e', "extension", Required = false, Default = "Auto", HelpText = "Assets output extension. A value of 'Auto' will determine the extension based on the asset type.")] + public string OutputExtension { get; set; } = "Auto"; + + /// + /// Number of scrape workers. + /// + [Option('w', "workers", Required = false, Default = 1, HelpText = "Number of scrape workers.")] + public int Workers { get; set; } = 1; + + /// + /// Roblox authentication cookie (ROBLOSECURITY). + /// For copylocked game scraping. + /// + [Option("cookies", Required = false, HelpText = "Roblox authentication cookie (.ROBLOSECURITY). This argument is prioritised over the environment variable 'ROBLOXULTIMATESCRAPER_COOKIE'.")] + public string? AuthCookie { get; set; } + + /// + /// Http timeout in seconds. + /// + [Option('t', "timeout", Required = false, Default = 180, HelpText = "Http timeout in seconds.")] + public int HttpTimeout { get; set; } = 180; + + private string _baseUrl = "roblox.com"; + + /// + /// Roblox environment to download from. + /// + [Option("baseurl", Required = false, Default = "www.roblox.com", HelpText = "Roblox environment to download from.")] + public string BaseUrl + { + get => _baseUrl; + + set + { + if (value.StartsWith("http://")) + value = value[7..]; + else if (value.StartsWith("https://")) + value = value[8..]; + + if (value.StartsWith("www.") || value.StartsWith("web.")) + value = value[4..]; + + int idx = value.IndexOf('/'); + if (idx != -1) + value = value[..idx]; + + _baseUrl = value; + } + } + + [Option("trimcdnurlinconsole", Required = false, Default = null, HelpText = "Should the CDN url in console be trimmed.")] + public bool? TrimCdnUrlInConsole { get; set; } + } +} diff --git a/RobloxUltimateScraper/Config.cs b/RobloxUltimateScraper/Config.cs index 08bab82..6ed8bf9 100644 --- a/RobloxUltimateScraper/Config.cs +++ b/RobloxUltimateScraper/Config.cs @@ -1,317 +1,54 @@ -using CommandLine; -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; +using RobloxUltimateScraper.Enums; namespace RobloxUltimateScraper { - /// - /// Output type - /// - internal enum OutputType - { - /// - /// Asset files - /// - Files = 0, - - [Obsolete] - FilesOnly = 0, - - /// - /// Asset index - /// - Index = 1, - - [Obsolete] - IndexOnly = 1, - - /// - /// Console output - /// - Console = 2, - - /// - /// Asset files and index - /// - Both = 3 - } - - /// - /// Compression type on asset files - /// - internal enum CompressionType - { - /// - /// No compression - /// - None, - - /// - /// GZip compression - /// - GZip, - - /// - /// BZip2 compression - /// - BZip2, - - /// - /// Zstd compression - /// - Zstd - } - - /// - /// Index type - /// - internal enum IndexType - { - /// - /// Text index - /// - Text, - - /// - /// Json index - /// - Json, - - /// - /// Text and json indexes - /// - All - } - - /// - /// Scraper type - /// - internal enum ScraperType - { - /// - /// Asset version scraper - /// - Asset, - - /// - /// Asset list scraper - /// - List, - - /// - /// Asset list scraper, with versions - /// - ListVersions, - - /// - /// Asset range scraper - /// - Range - } - - /// - /// Scraper configuration - /// internal class Config { - /// - /// singleton. - /// - public static Config Default { get; set; } = default!; + public static Config Default { get; private set; } = null!; - /// - /// Selected scraper type. - /// - public ScraperType? Scraper { get; set; } + private CommandLineConfig _clConfig; - /// - /// Asset to scrape. - /// Should be used with scraper types . - /// - public long ScraperId { get; set; } = 0; + public ulong ScraperAssetId => _clConfig.ScraperAssetId; - /// - /// Asset list to scrape. - /// Should be used with scraper types and . - /// - public string ScraperListPath { get; set; } = string.Empty; + public string ScraperListPath => _clConfig.ScraperListPath; - /// - /// Asset scrape start range. - /// Should be used with scraper types . - /// - public long ScraperStartRange { get; set; } = 0; + public ulong ScraperStartRange => _clConfig.ScraperStartRange; + public ulong ScraperEndRange => _clConfig.ScraperEndRange; - /// - /// Asset scrape end range. - /// Should be used with scraper types . - /// - public long ScraperEndRange { get; set; } = 0; + public ScraperType Scraper => _clConfig.Scraper; + public OutputType OutputType => _clConfig.OutputType; + public IndexType IndexType => _clConfig.IndexType; - /// - /// Use the asset scraper. - /// COMMAND LINE USE ONLY! - /// - [Option('a', "asset", Required = false, HelpText = "Use the asset scraper. Parameter takes in an ID.")] - public long UseAssetScraper - { - set - { - Scraper = ScraperType.Asset; - ScraperId = value; - } - } + public CompressionType CompressionType => _clConfig.CompressionType; + public int CompressionLevel { get; } - /// - /// Use the asset list scraper. - /// COMMAND LINE USE ONLY! - /// - [Option('l', "list", Required = false, HelpText = "Use the asset list scraper. Parameter takes in a list path. WIP!")] - public string UseListScraper - { - set - { - Scraper = ScraperType.List; - ScraperListPath = value; - } - } + public string OutputDirectory => _clConfig.OutputDirectory; + public string OutputExtension => _clConfig.OutputExtension; - /// - /// Use the asset list versions scraper. - /// COMMAND LINE USE ONLY! - /// - [Option("listversions", Required = false, HelpText = "Use the asset list version scraper. Parameter takes in a list path. WIP!")] - public string UseListVersionsScraper - { - set - { - Scraper = ScraperType.ListVersions; - ScraperListPath = value; - } - } + public int Workers => _clConfig.Workers; - /// - /// Use the asset range scraper. - /// COMMAND LINE USE ONLY! - /// - [Option('r', "range", Required = false, HelpText = "Use the asset range scraper. Parameter takes in [Start ID]-[End ID]. WIP!")] - public string UseRangeScraper - { - set - { - Scraper = ScraperType.Range; + public string? AuthCookie => _clConfig.AuthCookie; - // parse input - string[] segments = value.Split('-'); + public int HttpTimeout => _clConfig.HttpTimeout; - if (segments.Length != 2) - throw new ArgumentException("Parameter is not in valid format."); + public string BaseUrl => _clConfig.BaseUrl; - if (!long.TryParse(segments[0], out long startRange)) - throw new ArgumentException("Start range is not an integer."); + public bool TrimCdnUrlInConsole { get; } - if (!long.TryParse(segments[1], out long endRange)) - throw new ArgumentException("End range is not an integer."); + public Config(CommandLineConfig config) + { + _clConfig = config; - ScraperStartRange = startRange; - ScraperEndRange = endRange; - } + CompressionLevel = _clConfig.CompressionLevelArgOtherName != null ? (int)_clConfig.CompressionLevelArgOtherName : _clConfig.CompressionLevelArg; + TrimCdnUrlInConsole = _clConfig.TrimCdnUrlInConsole ?? OutputType != OutputType.Console; } - /// - /// Assets output type. - /// - [Option('o', "output", Required = false, Default = OutputType.Both, HelpText = "Assets output type. (Files, Index, Console, Both)")] - public OutputType OutputType { get; set; } = OutputType.Both; - - /// - /// Index type. - /// - [Option('i', "index", Required = false, Default = IndexType.All, HelpText = "Index type. (Text, Json, All)")] - public IndexType IndexType { get; set; } = IndexType.All; - - /// - /// Asset compression type. - /// - [Option('c', "compression", Required = false, Default = CompressionType.None, HelpText = "Compression type. (None, GZip, Bzip2, Zstd)")] - public CompressionType CompressionType { get; set; } = CompressionType.None; - - [Option("compressionlevel", Required = false, Default = 9, HelpText = "Compression level for the compression. Only works for BZip2 (1-9) and Zstd (1-22). Other name: --cl.")] - public int CompressionLevelArg { get; set; } = 9; // 9 is good for both BZip2 and Zstd - - // this sucks but commandlineparser has no way to set multiple names for an argument - // and short arguments are only allowed to be a single character - [Option("cl", Required = false, Hidden = true)] - public int? CompressionLevelArgOtherName { get; set; } - - /// - /// Asset compression level. - /// - public int CompressionLevel { get => CompressionLevelArgOtherName != null ? (int)CompressionLevelArgOtherName : CompressionLevelArg; } - - /// - /// Assets output directory. - /// - [Option('d', "directory", Required = false, HelpText = "Assets output directory.")] - public string OutputDirectory { get; set; } = ""; - - /// - /// Assets output extension. - /// - [Option('e', "extension", Required = false, Default = "Auto", HelpText = "Assets output extension. A value of 'Auto' will determine the extension based on the asset type.")] - public string OutputExtension { get; set; } = "Auto"; - - /// - /// Number of scrape workers. - /// - [Option('w', "workers", Required = false, Default = 1, HelpText = "Number of scrape workers.")] - public int Workers { get; set; } = 1; - - /// - /// Roblox authentication cookie (ROBLOSECURITY). - /// For copylocked game scraping. - /// - [Option("cookies", Required = false, HelpText = "Roblox authentication cookie (.ROBLOSECURITY). This argument is prioritised over the environment variable 'ROBLOXULTIMATESCRAPER_COOKIE'.")] - public string? AuthCookie { get; set; } - - /// - /// Http timeout in seconds. - /// - [Option('t', "timeout", Required = false, Default = 180, HelpText = "Http timeout in seconds.")] - public int HttpTimeout { get; set; } = 180; - - private string _baseUrl = "roblox.com"; - - /// - /// Roblox environment to download from. - /// - [Option("baseurl", Required = false, Default = "www.roblox.com", HelpText = "Roblox environment to download from.")] - public string BaseUrl + public static void Initialise(CommandLineConfig commandLineConfig) { - get => _baseUrl; - - set - { - if (value.StartsWith("http://")) - value = value[7..]; - else if (value.StartsWith("https://")) - value = value[8..]; + if (Default != null) + throw new Exception("Can not initialise Config twice."); - if (value.StartsWith("www.") || value.StartsWith("web.")) - value = value[4..]; - - int idx = value.IndexOf('/'); - if (idx != -1) - value = value[..idx]; - - _baseUrl = value; - } + Default = new Config(commandLineConfig); } - - [Option("trimcdnurlinconsole", Required = false, Default = null, HelpText = "Should the CDN url in console be trimmed.")] - public bool? TrimCdnUrlInConsole { get; set; } } } diff --git a/RobloxUltimateScraper/Enums/CompressionType.cs b/RobloxUltimateScraper/Enums/CompressionType.cs new file mode 100644 index 0000000..8d4e9ef --- /dev/null +++ b/RobloxUltimateScraper/Enums/CompressionType.cs @@ -0,0 +1,28 @@ +namespace RobloxUltimateScraper.Enums +{ + /// + /// Compression type on asset files + /// + internal enum CompressionType + { + /// + /// No compression + /// + None, + + /// + /// GZip compression + /// + GZip, + + /// + /// BZip2 compression + /// + BZip2, + + /// + /// Zstd compression + /// + Zstd + } +} diff --git a/RobloxUltimateScraper/Enums/IndexType.cs b/RobloxUltimateScraper/Enums/IndexType.cs new file mode 100644 index 0000000..7641eb5 --- /dev/null +++ b/RobloxUltimateScraper/Enums/IndexType.cs @@ -0,0 +1,23 @@ +namespace RobloxUltimateScraper.Enums +{ + /// + /// Index type + /// + internal enum IndexType + { + /// + /// Text index + /// + Text, + + /// + /// Json index + /// + Json, + + /// + /// Text and json indexes + /// + All + } +} diff --git a/RobloxUltimateScraper/Enums/OutputType.cs b/RobloxUltimateScraper/Enums/OutputType.cs new file mode 100644 index 0000000..b3167f1 --- /dev/null +++ b/RobloxUltimateScraper/Enums/OutputType.cs @@ -0,0 +1,40 @@ +namespace RobloxUltimateScraper.Enums +{ + /// + /// Asset output type + /// + internal enum OutputType + { + /// + /// Asset files + /// + Files = 0, + + [Obsolete] + FilesOnly = 0, + + /// + /// Asset index + /// + Index = 1, + + [Obsolete] + IndexOnly = 1, + + /// + /// Console output + /// + Console = 2, + + /// + /// Asset files and index + /// + Both = 3 + } + + internal static class OutputTypeEx + { + public static bool IsFileSavingEnabled(this OutputType type) => type == OutputType.Files || type == OutputType.Both; + public static bool IsIndexEnabled(this OutputType type) => type == OutputType.Index || type == OutputType.Both; + } +} diff --git a/RobloxUltimateScraper/Enums/ScraperType.cs b/RobloxUltimateScraper/Enums/ScraperType.cs new file mode 100644 index 0000000..586a2ba --- /dev/null +++ b/RobloxUltimateScraper/Enums/ScraperType.cs @@ -0,0 +1,33 @@ +namespace RobloxUltimateScraper.Enums +{ + /// + /// Scraper type + /// + internal enum ScraperType + { + /// + /// No scraper selected + /// + None, + + /// + /// Asset version scraper + /// + Asset, + + /// + /// Asset list scraper + /// + List, + + /// + /// Asset list scraper, with versions + /// + ListVersions, + + /// + /// Asset range scraper + /// + Range + } +} diff --git a/RobloxUltimateScraper/FileWriter.cs b/RobloxUltimateScraper/FileWriter.cs index 4a2882d..5ebcb09 100644 --- a/RobloxUltimateScraper/FileWriter.cs +++ b/RobloxUltimateScraper/FileWriter.cs @@ -1,9 +1,4 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Reflection.Emit; -using System.Text; -using System.Threading.Tasks; +using RobloxUltimateScraper.Enums; namespace RobloxUltimateScraper { diff --git a/RobloxUltimateScraper/Http.cs b/RobloxUltimateScraper/Http.cs new file mode 100644 index 0000000..a81be3d --- /dev/null +++ b/RobloxUltimateScraper/Http.cs @@ -0,0 +1,47 @@ +using System.Net; + +namespace RobloxUltimateScraper +{ + internal static class Http + { + public static HttpClient Client { get; } + + static Http() + { + CookieContainer cookieContainer = new CookieContainer(); + string? cookie = null; + + if (!string.IsNullOrEmpty(Config.Default.AuthCookie)) + { + Console.WriteLine("Using cookies from arguments."); + cookie = Config.Default.AuthCookie; + } + else + { + string? envValue = Environment.GetEnvironmentVariable("ROBLOXULTIMATESCRAPER_COOKIE"); + if (!string.IsNullOrEmpty(envValue)) + { + Console.WriteLine("Using cookies from environment variables."); + cookie = envValue; + } + } + + if (cookie != null) + cookieContainer.Add(new Cookie(".ROBLOSECURITY", cookie, "/", $".{Config.Default.BaseUrl}")); + + HttpClientHandler httpClientHandler = new HttpClientHandler + { + AutomaticDecompression = DecompressionMethods.All, + AllowAutoRedirect = false, // we are using v1 because v2 is bad + CookieContainer = cookieContainer, + UseCookies = true + }; + + Client = new HttpClient(httpClientHandler) + { + Timeout = TimeSpan.FromSeconds(Config.Default.HttpTimeout) + }; + //_HttpClient.DefaultRequestHeaders.Add("User-Agent", "Roblox/WinINet"); + } + } +} diff --git a/RobloxUltimateScraper/Models/AssetInput.cs b/RobloxUltimateScraper/Models/AssetInput.cs deleted file mode 100644 index 9afd2fe..0000000 --- a/RobloxUltimateScraper/Models/AssetInput.cs +++ /dev/null @@ -1,14 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; - -namespace RobloxUltimateScraper.Models -{ - internal class AssetInput - { - public long Id { get; set; } - public int Version { get; set; } - } -} diff --git a/RobloxUltimateScraper/Models/AssetOutput.cs b/RobloxUltimateScraper/Models/AssetOutput.cs index 1ad4ce4..c555d4f 100644 --- a/RobloxUltimateScraper/Models/AssetOutput.cs +++ b/RobloxUltimateScraper/Models/AssetOutput.cs @@ -1,22 +1,10 @@ -using System; -using System.Collections.Generic; -using System.Diagnostics; -using System.Linq; -using System.Text; -using System.Threading.Tasks; - -namespace RobloxUltimateScraper.Models +namespace RobloxUltimateScraper.Models { /// /// Asset information for index /// internal class AssetOutput : IComparable { - /// - /// Asset version - /// - public long Id { get; set; } - /// /// Asset version /// @@ -67,7 +55,7 @@ internal class AssetOutput : IComparable // 1818 | v1 | Error: failed to download public string ToString(bool trimCdnUrl) { - string output = $"{Id} | v{Version}"; + string output = $"v{Version}"; if (Error != null) { @@ -96,10 +84,6 @@ public int CompareTo(AssetOutput? other) { if (other == null) return 1; - // compare asset ids - if (Id > other.Id) return 1; - if (Id < other.Id) return -1; - // compare versions if (Version > other.Version) return 1; if (Version < other.Version) return -1; diff --git a/RobloxUltimateScraper/Program.cs b/RobloxUltimateScraper/Program.cs index c9e9d12..b033ba2 100644 --- a/RobloxUltimateScraper/Program.cs +++ b/RobloxUltimateScraper/Program.cs @@ -1,7 +1,6 @@ using CommandLine; using CommandLine.Text; using RobloxUltimateScraper.Enums; -using RobloxUltimateScraper.Models; using System.Reflection; namespace RobloxUltimateScraper @@ -14,9 +13,9 @@ static async Task Main(string[] args) args = new string[] { "--help" }; Parser cmdLineParser = new Parser(settings => settings.CaseInsensitiveEnumValues = true); - ParserResult configParser = cmdLineParser.ParseArguments(args); + ParserResult configParser = cmdLineParser.ParseArguments(args); configParser.WithNotParsed(errors => Error(configParser, errors)); - await configParser.WithParsedAsync(async config => await Run(config)); + configParser.WithParsed(config => Run(config)); } /// @@ -24,11 +23,11 @@ static async Task Main(string[] args) /// /// Configuration /// - static async Task Run(Config config) + static void Run(CommandLineConfig config) { Console.WriteLine($"RobloxUltimateScraper v{Assembly.GetExecutingAssembly().GetName().Version}"); - Config.Default = config; + Config.Initialise(config); // TODO: add functionality for // list @@ -36,13 +35,13 @@ static async Task Run(Config config) // range switch (Config.Default.Scraper) { - case null: - Console.WriteLine("Please define which scraper you wish to use!"); + case ScraperType.None: + Console.WriteLine("No scraper chosen."); Console.WriteLine("Run the scraper with the --help argument for all commands."); break; case ScraperType.Asset: - await RunAssetScraper(); + RunAssetScraper(); break; case ScraperType.List: @@ -67,7 +66,7 @@ static async Task Run(Config config) /// Handles command line parsing failure /// /// Errors from command line parser - static void Error(ParserResult config, IEnumerable errors) + static void Error(ParserResult config, IEnumerable errors) { HelpText text = HelpText.AutoBuild(config); Console.WriteLine(text); @@ -82,63 +81,46 @@ static void Error(ParserResult config, IEnumerable errors) /// Downloads /// Errors /// Versions - static void SetAssetScraperTitle(long id, int downloaded, int errors, int total) + static void SetAssetScraperTitle(ulong id, int downloaded, int errors, int total) { - Console.Title = $"{nameof(RobloxUltimateScraper)} | Asset {id} | {downloaded}/{total} | {errors} Errors"; + Console.Title = $"RobloxUltimateScraper | Asset {id} | {downloaded}/{total} | {errors} Errors"; } /// /// Starts the asset scraper /// /// - static async Task RunAssetScraper() + static void RunAssetScraper() { - long assetId = Config.Default.ScraperId; + ulong assetId = Config.Default.ScraperAssetId; - if (string.IsNullOrEmpty(Config.Default.OutputDirectory) && !Scraper.ConsoleOnly) - Config.Default.OutputDirectory = $"Asset_{assetId}"; + string outputDirectory; - Scraper.ShouldTrimCdnUrlInConsole = Config.Default.TrimCdnUrlInConsole ?? !Scraper.ConsoleOnly; + if (string.IsNullOrEmpty(Config.Default.OutputDirectory) && Config.Default.OutputType != OutputType.Console) + outputDirectory = $"Asset_{assetId}"; + else + outputDirectory = Config.Default.OutputDirectory; - // get all place versions - var assetDeliveryInfo = await Scraper.GetAssetDeliveryInformation(assetId); + Scraper scraper = new Scraper(assetId, outputDirectory); + scraper.Setup().Wait(); - if (!assetDeliveryInfo.Success) - { - Console.WriteLine($"Failed to fetch versions for asset {assetId}: {assetDeliveryInfo.Error}"); - Environment.Exit(1); - } - - Console.WriteLine($"Asset {assetId} has {assetDeliveryInfo.TotalVersions} versions!"); - - Scraper.FileExtension = Config.Default.OutputExtension == "Auto" ? assetDeliveryInfo.AssetType.GetExtension() : Config.Default.OutputExtension; - Scraper.CompressionLevel = Config.Default.CompressionLevel; // BZip2 and Zstd libraries automatically clamp the compression level - - // add to queue - for (int i = 1; i <= assetDeliveryInfo.TotalVersions; i++) - { - Scraper.Assets.Enqueue(new AssetInput - { - Id = assetId, - Version = i - }); - } + Console.WriteLine($"Asset {assetId} has {scraper.TotalVersions} versions!"); // set up titles - SetAssetScraperTitle(assetId, 0, 0, assetDeliveryInfo.TotalVersions); - Scraper.OnDownloadFinished += () => SetAssetScraperTitle(assetId, Scraper.SuccessfulDownloads, Scraper.FailedDownloads, assetDeliveryInfo.TotalVersions); + SetAssetScraperTitle(assetId, 0, 0, scraper.TotalVersions); + scraper.OnDownloadFinished += () => SetAssetScraperTitle(assetId, scraper.SuccessfulDownloads, scraper.FailedDownloads, scraper.TotalVersions); // start workers List workers = new List(); for (int i = 1; i <= Config.Default.Workers; i++) - workers.Add(Task.Run(Scraper.StartWorker)); + workers.Add(Task.Run(scraper.StartWorker)); Task.WaitAll(workers.ToArray()); // finalise - Scraper.PrintDownloadStatistics(); - Scraper.WriteIndexFile($"{assetId} asset versions on {DateTime.Now.ToString("R")} ({assetDeliveryInfo.TotalVersions} versions)"); + scraper.PrintDownloadStatistics(); + scraper.WriteIndexFile($"{assetId} asset versions on {DateTime.Now.ToString("R")} ({scraper.TotalVersions} versions)"); } } } \ No newline at end of file diff --git a/RobloxUltimateScraper/Scraper.cs b/RobloxUltimateScraper/Scraper.cs index a32b18b..55120cd 100644 --- a/RobloxUltimateScraper/Scraper.cs +++ b/RobloxUltimateScraper/Scraper.cs @@ -1,138 +1,84 @@ using RobloxUltimateScraper.Enums; using RobloxUltimateScraper.Models; -using System; -using System.Collections.Generic; using System.Diagnostics; -using System.Linq; using System.Net; using System.Text; using System.Text.Json; -using System.Threading.Tasks; namespace RobloxUltimateScraper { /// - /// The core + /// Asset ID scraper /// - internal static class Scraper + internal class Scraper { /// - /// Assets to download + /// Successful or failed download event. /// - public static Queue Assets { get; } + public delegate void DownloadFinished(); - /// - /// File extension to be used for saving - /// - public static string? FileExtension { get; set; } = null; + private object _lock = new object(); - /// - /// Should the CDN url be trimmed in the console output - /// - public static bool ShouldTrimCdnUrlInConsole { get; set; } = true; + public ulong AssetId { get; } - /// - /// Compression level - /// - public static int CompressionLevel { get; set; } = 0; + public int TotalVersions { get; private set; } - /// - /// Is index enabled - /// - public static bool IndexEnabled { get { return Config.Default.OutputType == OutputType.Index || Config.Default.OutputType == OutputType.Both; } } + public int CurrentVersion { get; private set; } - /// - /// Are files enabled - /// - public static bool FilesEnabled { get { return Config.Default.OutputType == OutputType.Files || Config.Default.OutputType == OutputType.Both; } } + public string OutputDirectory { get; } /// - /// Is console only + /// File extension to be used for saving /// - public static bool ConsoleOnly { get { return Config.Default.OutputType == OutputType.Console; } } + public string? FileExtension { get; set; } = null; /// /// Versions that successfully downloaded /// - public static int SuccessfulDownloads { get; private set; } + public int SuccessfulDownloads { get; private set; } /// /// Versions that failed to download /// - public static int FailedDownloads { get; private set; } - - /// - /// Successful or failed download event. - /// - public delegate void DownloadFinished(); + public int FailedDownloads { get; private set; } /// /// Event that fires upon a successful or failed download. /// - public static event DownloadFinished? OnDownloadFinished; - - /// - /// singleton. - /// - private static HttpClient _HttpClient { get; } - - /// - /// Http client cookies. - /// - private static CookieContainer _CookieContainer { get; } + public event DownloadFinished? OnDownloadFinished; /// /// Index entries /// - private static List _Index { get; } + private List _index = new List(); /// /// Initialises values used by /// - static Scraper() + public Scraper(ulong assetId, string outputDirectory) { - Assets = new Queue(); - - SuccessfulDownloads = 0; - FailedDownloads = 0; - - _CookieContainer = new CookieContainer(); + AssetId = assetId; + OutputDirectory = outputDirectory; + } - string? cookie = null; + public struct SetupResult + { + public bool Success; + public string Message; + } - if (!string.IsNullOrEmpty(Config.Default.AuthCookie)) - { - Console.WriteLine("Using cookies from arguments."); - cookie = Config.Default.AuthCookie; - } - else - { - string? envValue = Environment.GetEnvironmentVariable("ROBLOXULTIMATESCRAPER_COOKIE"); - if (!string.IsNullOrEmpty(envValue)) - { - Console.WriteLine("Using cookies from environment variables."); - cookie = envValue; - } - } - - if (cookie != null) - _CookieContainer.Add(new Cookie(".ROBLOSECURITY", cookie, "/", $".{Config.Default.BaseUrl}")); + public async Task Setup() + { + var assetDeliveryInfo = await GetAssetDeliveryInformation(); + if (!assetDeliveryInfo.Success) + return new SetupResult { Success = false, Message = $"Failed to fetch versions for asset {AssetId}: {assetDeliveryInfo.Error}" }; - HttpClientHandler httpClientHandler = new HttpClientHandler - { - AutomaticDecompression = DecompressionMethods.All, - AllowAutoRedirect = false, // we are using v1 because v2 is bad - CookieContainer = _CookieContainer, - UseCookies = true - }; + TotalVersions = assetDeliveryInfo.TotalVersions; - _HttpClient = new HttpClient(httpClientHandler) - { - Timeout = TimeSpan.FromSeconds(Config.Default.HttpTimeout) - }; - //_HttpClient.DefaultRequestHeaders.Add("User-Agent", "Roblox/WinINet"); + FileExtension = Config.Default.OutputExtension == "Auto" ? assetDeliveryInfo.AssetType.GetExtension() : Config.Default.OutputExtension; + Directory.CreateDirectory(OutputDirectory); - _Index = new List(); + return new SetupResult { Success = true }; } /// @@ -141,10 +87,10 @@ static Scraper() /// Asset Id /// Asset Version (0 for latest) /// Http response - public static Task AssetRequest(long id, int version = 0) + public Task AssetRequest(int version = 0) { - string url = $"https://assetdelivery.{Config.Default.BaseUrl}/v1/asset/?id={id}&version={version}"; - return _HttpClient.GetAsync(url); + string url = $"https://assetdelivery.{Config.Default.BaseUrl}/v1/asset/?id={AssetId}&version={version}"; + return Http.Client.GetAsync(url); } /// @@ -182,9 +128,9 @@ public struct AssetDeliveryInformation /// /// Asset Id /// Asset delivery information - public static async Task GetAssetDeliveryInformation(long id) + public async Task GetAssetDeliveryInformation() { - HttpResponseMessage response = await AssetRequest(id); + HttpResponseMessage response = await AssetRequest(); if (response.StatusCode == HttpStatusCode.Conflict) return new AssetDeliveryInformation { Success = false, Error = "Insufficient permissions to download asset" }; @@ -193,8 +139,8 @@ public static async Task GetAssetDeliveryInformation(l return new AssetDeliveryInformation { Success = false, Error = $"Unhandled status code ({(int)response.StatusCode})" }; IEnumerable? values; - int versions = 0; - AssetType assetType = 0; + int versions; + AssetType assetType; { if (!response.Headers.TryGetValues("roblox-assetversionnumber", out values)) @@ -228,9 +174,9 @@ public static async Task GetAssetDeliveryInformation(l /// Asset Id /// Version (0 for latest) /// Success, Error string, CDN url - public static async Task<(bool, string, string)> GetCDNUrl(long id, int version = 0) + public async Task<(bool, string, string)> GetCdnUrl(int version = 0) { - HttpResponseMessage response = await AssetRequest(id, version); + HttpResponseMessage response = await AssetRequest(version); if (response.StatusCode == HttpStatusCode.Conflict) return (false, "Insufficient permissions to download asset", ""); @@ -252,9 +198,9 @@ public static async Task GetAssetDeliveryInformation(l /// Id /// Version /// Asset output path - public static string BuildAssetOutputFileName(long id, int version) + public string BuildAssetOutputFileName(int version) { - string fileName = id.ToString(); + string fileName = AssetId.ToString(); if (version != 0) fileName += $"-v{version}"; @@ -264,13 +210,12 @@ public static string BuildAssetOutputFileName(long id, int version) /// /// Logs an asset to index /// - /// Id /// Version /// CDN url /// File size in Mb /// Last modified /// Error message - private static void LogAsset(long id, + private void LogAsset( int version, string? cdnUrl = null, double? fileSizeInMb = null, @@ -279,7 +224,6 @@ private static void LogAsset(long id, { AssetOutput output = new AssetOutput { - Id = id, Version = version, CDNUrl = cdnUrl, FileSizeInMb = fileSizeInMb, @@ -287,26 +231,21 @@ private static void LogAsset(long id, Error = error }; - Console.WriteLine(output.ToString(trimCdnUrl: ShouldTrimCdnUrlInConsole)); + Console.WriteLine(output.ToString(trimCdnUrl: Config.Default.TrimCdnUrlInConsole)); - _Index.Add(output); + _index.Add(output); } /// /// Logs an asset to index and saves it /// /// Http response messsage - /// Id /// Version /// CDN url - private static async Task LogAssetFromCDNHttpMessageResponse(HttpResponseMessage response, - long id, + private async Task LogAssetFromCdnHttpMessageResponse(HttpResponseMessage response, int version, string cdnUrl) { - if (!ConsoleOnly) - Directory.CreateDirectory(Config.Default.OutputDirectory); - // get last modified string? lastModified = response.Content.Headers.GetValues("last-modified").FirstOrDefault(); @@ -316,20 +255,19 @@ private static async Task LogAssetFromCDNHttpMessageResponse(HttpResponseMessage { fileSize = Math.Round(stream.Length / 1024f / 1024f, 6); - if (FilesEnabled) + if (Config.Default.OutputType.IsFileSavingEnabled()) { - string outputName = BuildAssetOutputFileName(id, version); - string path = Path.Combine(Config.Default.OutputDirectory, outputName); + string outputName = BuildAssetOutputFileName(version); + string path = Path.Combine(OutputDirectory, outputName); string outputPath = FileWriter.BuildOutputFileName(path, FileExtension); DateTime? lastModifiedDT = lastModified != null ? DateTime.Parse(lastModified) : null; - FileWriter.Save(outputPath, stream, CompressionLevel, lastModifiedDT); + FileWriter.Save(outputPath, stream, Config.Default.CompressionLevel, lastModifiedDT); } } LogAsset( - id: id, version: version, cdnUrl: cdnUrl, fileSizeInMb: fileSize, @@ -340,7 +278,7 @@ private static async Task LogAssetFromCDNHttpMessageResponse(HttpResponseMessage /// /// Increments and invokes . /// - private static void FireAssetSuccess() + private void FireAssetSuccess() { SuccessfulDownloads++; OnDownloadFinished?.Invoke(); @@ -349,7 +287,7 @@ private static void FireAssetSuccess() /// /// Increments and invokes . /// - private static void FireAssetFailed() + private void FireAssetFailed() { SuccessfulDownloads++; OnDownloadFinished?.Invoke(); @@ -360,47 +298,48 @@ private static void FireAssetFailed() /// /// Worker // TODO: add try catch blocks. give 3 retries w/ exceptions - public static async Task StartWorker() + public async Task StartWorker() { - while (Assets.Count > 0) + while (TotalVersions > CurrentVersion) { - AssetInput asset; - lock (Assets) + int version; + lock (_lock) { - if (Assets.Count == 0) + if (TotalVersions <= CurrentVersion) continue; - asset = Assets.Dequeue(); + CurrentVersion++; + version = CurrentVersion; } // get the url - (bool cdnGetSuccess, string cdnGetMessage, string cdnUrl) = await GetCDNUrl(asset.Id, asset.Version); + (bool cdnGetSuccess, string cdnGetMessage, string cdnUrl) = await GetCdnUrl(version); if (!cdnGetSuccess) { - LogAsset(error: $"Failed to fetch {asset.Id} v{asset.Version}: {cdnGetMessage}", id: asset.Id, version: asset.Version); + LogAsset(error: $"Failed to fetch {AssetId} v{version}: {cdnGetMessage}", version: version); FireAssetFailed(); continue; } // download the asset - HttpResponseMessage cdnResponse = await _HttpClient.GetAsync(cdnUrl); + HttpResponseMessage cdnResponse = await Http.Client.GetAsync(cdnUrl); if (cdnResponse.StatusCode == HttpStatusCode.Forbidden) { - LogAsset(error: $"Failed to fetch {asset.Id} v{asset.Version} ({cdnUrl}): Asset not found on CDN", id: asset.Id, version: asset.Version); + LogAsset(error: $"Failed to fetch {AssetId} v{version} ({cdnUrl}): Asset not found on CDN", version: version); FireAssetFailed(); continue; } if (!IsSuccessStatusCode(cdnResponse.StatusCode)) { - LogAsset(error: $"Failed to fetch {asset.Id} v{asset.Version} ({cdnUrl}): Unknown status code ({(int)cdnResponse.StatusCode})", id: asset.Id, version: asset.Version); + LogAsset(error: $"Failed to fetch {AssetId} v{version} ({cdnUrl}): Unknown status code ({(int)cdnResponse.StatusCode})", version: version); FireAssetFailed(); continue; } // save! - await LogAssetFromCDNHttpMessageResponse(cdnResponse, asset.Id, asset.Version, cdnUrl); + await LogAssetFromCdnHttpMessageResponse(cdnResponse, version, cdnUrl); FireAssetSuccess(); } } @@ -408,7 +347,7 @@ public static async Task StartWorker() /// /// Prints download statistics /// - public static void PrintDownloadStatistics() + public void PrintDownloadStatistics() { Console.WriteLine($"Successful Downloads: {SuccessfulDownloads}"); Console.WriteLine($"Failed Downloads: {FailedDownloads}"); @@ -419,15 +358,15 @@ public static void PrintDownloadStatistics() /// Writes the index file /// /// Index header - public static void WriteIndexFile(string header) + public void WriteIndexFile(string header) { - if (!IndexEnabled) + if (!Config.Default.OutputType.IsIndexEnabled()) return; - Directory.CreateDirectory(Config.Default.OutputDirectory); + Directory.CreateDirectory(OutputDirectory); // sort index values - _Index.Sort(); + _index.Sort(); List indexPaths = new List(); @@ -438,12 +377,12 @@ public static void WriteIndexFile(string header) builder.AppendLine(header); - foreach (AssetOutput asset in _Index) + foreach (AssetOutput asset in _index) builder.AppendLine(asset.ToString()); string contents = builder.ToString(); - string path = Path.Combine(Config.Default.OutputDirectory, "index.txt"); + string path = Path.Combine(OutputDirectory, "index.txt"); indexPaths.Add(path); File.WriteAllText(path, contents); @@ -451,9 +390,9 @@ public static void WriteIndexFile(string header) if (Config.Default.IndexType == IndexType.Json || Config.Default.IndexType == IndexType.All) { - string contents = JsonSerializer.Serialize(_Index); + string contents = JsonSerializer.Serialize(_index); - string path = Path.Combine(Config.Default.OutputDirectory, "index.json"); + string path = Path.Combine(OutputDirectory, "index.json"); indexPaths.Add(path); File.WriteAllText(path, contents); From 2194bd991c96faec977b51007b493cb35e5fe285 Mon Sep 17 00:00:00 2001 From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com> Date: Sun, 17 Aug 2025 22:27:26 +0100 Subject: [PATCH 02/26] add range scraper --- RobloxUltimateScraper/Program.cs | 93 +++++++++++++++++-- .../Properties/launchSettings.json | 4 + RobloxUltimateScraper/RangeScraper.cs | 79 ++++++++++++++++ RobloxUltimateScraper/Scraper.cs | 37 +++++--- 4 files changed, 193 insertions(+), 20 deletions(-) create mode 100644 RobloxUltimateScraper/RangeScraper.cs diff --git a/RobloxUltimateScraper/Program.cs b/RobloxUltimateScraper/Program.cs index b033ba2..f5c505b 100644 --- a/RobloxUltimateScraper/Program.cs +++ b/RobloxUltimateScraper/Program.cs @@ -7,7 +7,7 @@ namespace RobloxUltimateScraper { internal class Program { - static async Task Main(string[] args) + static void Main(string[] args) { if (args.Length == 0) // make it display the help menu args = new string[] { "--help" }; @@ -32,7 +32,6 @@ static void Run(CommandLineConfig config) // TODO: add functionality for // list // list versions - // range switch (Config.Default.Scraper) { case ScraperType.None: @@ -53,7 +52,7 @@ static void Run(CommandLineConfig config) break; case ScraperType.Range: - Console.WriteLine("Range scraper has not been implemented yet."); + RunRangeScraper(); break; default: @@ -96,7 +95,7 @@ static void RunAssetScraper() string outputDirectory; - if (string.IsNullOrEmpty(Config.Default.OutputDirectory) && Config.Default.OutputType != OutputType.Console) + if (string.IsNullOrEmpty(Config.Default.OutputDirectory)) outputDirectory = $"Asset_{assetId}"; else outputDirectory = Config.Default.OutputDirectory; @@ -108,7 +107,7 @@ static void RunAssetScraper() // set up titles SetAssetScraperTitle(assetId, 0, 0, scraper.TotalVersions); - scraper.OnDownloadFinished += () => SetAssetScraperTitle(assetId, scraper.SuccessfulDownloads, scraper.FailedDownloads, scraper.TotalVersions); + scraper.OnDownloadFinished += (_) => SetAssetScraperTitle(assetId, scraper.SuccessfulDownloads, scraper.FailedDownloads, scraper.TotalVersions); // start workers List workers = new List(); @@ -120,7 +119,89 @@ static void RunAssetScraper() // finalise scraper.PrintDownloadStatistics(); - scraper.WriteIndexFile($"{assetId} asset versions on {DateTime.Now.ToString("R")} ({scraper.TotalVersions} versions)"); + scraper.WriteIndexFile(); + } + + class RangeScraperData + { + public ulong StartRange = 0; + public ulong EndRange = 0; + + public ulong TotalIds = 0; + public int DownloadedIds = 0; + public int ErrorIds = 0; + + public int TotalVersions = 0; + public int DownloadedVersions = 0; + public int ErrorVersions = 0; + } + + static async Task RangeScraperTitleLogic(RangeScraperData data, CancellationToken token) + { + while (!token.IsCancellationRequested) + { + Console.Title = $"RobloxUltimateScraper | Range {data.StartRange}-{data.EndRange} | " + + $"{data.DownloadedIds}/{data.TotalIds} IDs ({data.ErrorIds} errors) | " + + $"{data.DownloadedIds}/{data.TotalVersions} Versions ({data.ErrorVersions} errors)"; + + try + { + await Task.Delay(1000, token); + } + catch (TaskCanceledException) + { + return; + } + } + } + + static void RunRangeScraper() + { + string outputDirectory; + + if (string.IsNullOrEmpty(Config.Default.OutputDirectory)) + outputDirectory = $"Range_{DateTimeOffset.UtcNow.ToUnixTimeSeconds()}"; + else + outputDirectory = Config.Default.OutputDirectory; + + if (Config.Default.OutputType != OutputType.Console) + Directory.CreateDirectory(outputDirectory); + + RangeScraperData data = new RangeScraperData() + { + StartRange = Config.Default.ScraperStartRange, + EndRange = Config.Default.ScraperEndRange, + TotalIds = Config.Default.ScraperEndRange - Config.Default.ScraperStartRange + }; + + RangeScraper scraper = new RangeScraper(Config.Default.ScraperStartRange, Config.Default.ScraperEndRange, outputDirectory); + + // set up titles + scraper.OnAssetDownloadFinished += (bool success) => { Interlocked.Increment(ref data.DownloadedVersions); if (!success) { Interlocked.Increment(ref data.ErrorVersions); } }; + scraper.OnAssetVersionsDiscovered += (int versions) => { Interlocked.Add(ref data.TotalVersions, versions); }; + + scraper.OnAssetFinished += () => Interlocked.Increment(ref data.DownloadedIds); + scraper.OnAssetError += () => { Interlocked.Increment(ref data.DownloadedIds); Interlocked.Increment(ref data.ErrorIds); }; + + CancellationTokenSource cts = new CancellationTokenSource(); + Task titleUpdateTask = Task.Run(() => RangeScraperTitleLogic(data, cts.Token)); + + // start workers + List workers = new List(); + + for (int i = 1; i <= Config.Default.Workers; i++) + workers.Add(Task.Run(scraper.StartWorker)); + + Task.WaitAll(workers.ToArray()); + cts.Cancel(); + + Console.WriteLine($"ID Range: {data.StartRange}-{data.EndRange}"); + Console.WriteLine($"Total IDs: {data.TotalIds}"); + Console.WriteLine($"Successful ID Downloads: {data.DownloadedIds}"); + Console.WriteLine($"Failed ID Downloads: {data.ErrorIds}"); + Console.WriteLine($"Total Versions: {data.TotalVersions}"); + Console.WriteLine($"Successful Version Downloads: {data.DownloadedVersions}"); + Console.WriteLine($"Failed Version Downloads: {data.ErrorVersions}"); } } } \ No newline at end of file diff --git a/RobloxUltimateScraper/Properties/launchSettings.json b/RobloxUltimateScraper/Properties/launchSettings.json index 178e2e5..fc68c1c 100644 --- a/RobloxUltimateScraper/Properties/launchSettings.json +++ b/RobloxUltimateScraper/Properties/launchSettings.json @@ -6,6 +6,10 @@ "RobloxUltimateScraper - Asset Scraper": { "commandName": "Project", "commandLineArgs": "-w 30\r\n-c Zstd\r\n--cl 15\r\n-d Crossroads\r\n-o Both\r\n-a 1818\r\n-i All" + }, + "RobloxUltimateScraper - Range Scraper": { + "commandName": "Project", + "commandLineArgs": "-w 10\r\n-c Zstd\r\n--cl 9\r\n-o Both\r\n-r 1000000-1000100\r\n-i All" } } } \ No newline at end of file diff --git a/RobloxUltimateScraper/RangeScraper.cs b/RobloxUltimateScraper/RangeScraper.cs new file mode 100644 index 0000000..673c048 --- /dev/null +++ b/RobloxUltimateScraper/RangeScraper.cs @@ -0,0 +1,79 @@ +namespace RobloxUltimateScraper +{ + /// + /// Range scraper + /// + internal class RangeScraper + { + public delegate void AssetDownloadFinished(bool success); // Once a version is finished downloading + public delegate void AssetVersionsDiscovered(int versions); // Once asset scraper returns total asset versions + public delegate void AssetFinished(); // Once an ID is finished downloading + public delegate void AssetError(); // Once asset scraper setup errors + + public event AssetDownloadFinished? OnAssetDownloadFinished; + public event AssetVersionsDiscovered? OnAssetVersionsDiscovered; + public event AssetFinished? OnAssetFinished; + public event AssetError? OnAssetError; + + public ulong StartRange { get; } + public ulong EndRange { get; } + + public ulong CurrentId { get; private set; } + + public string OutputDirectory { get; } + + private object _lock = new object(); + + public RangeScraper(ulong startRange, ulong endRange, string outputDirectory) + { + StartRange = startRange; + EndRange = endRange; + + CurrentId = startRange - 1; + + OutputDirectory = outputDirectory; + } + + public async Task StartWorker() + { + while (EndRange > CurrentId) + { + ulong id; + lock (_lock) + { + if (EndRange <= CurrentId) + continue; + CurrentId++; + id = CurrentId; + } + + string outputDirectory = Path.Combine(OutputDirectory, $"Asset_{id}"); + if (Directory.Exists(outputDirectory) && File.Exists(Path.Combine(outputDirectory, "index.txt"))) // index.txt is an indication that the download was finished. does not work for non-index runs. + { + Console.WriteLine($"Skipping {id} - already done. Delete the directory to redo the download."); + OnAssetFinished?.Invoke(); + continue; + } + + Scraper scraper = new Scraper(id, outputDirectory); + var result = await scraper.Setup(); + if (!result.Success) + { + Console.WriteLine($"Failed to download {id} ({result.Message})"); + OnAssetError?.Invoke(); + continue; + } + OnAssetVersionsDiscovered?.Invoke(scraper.TotalVersions); + + scraper.OnDownloadFinished += (bool success) => OnAssetDownloadFinished?.Invoke(success); + await scraper.StartWorker(); + + scraper.PrintDownloadStatistics(); + scraper.WriteIndexFile(); + + Console.WriteLine($"{id} has been completed."); + OnAssetFinished?.Invoke(); + } + } + } +} diff --git a/RobloxUltimateScraper/Scraper.cs b/RobloxUltimateScraper/Scraper.cs index 55120cd..dcc27f4 100644 --- a/RobloxUltimateScraper/Scraper.cs +++ b/RobloxUltimateScraper/Scraper.cs @@ -15,7 +15,7 @@ internal class Scraper /// /// Successful or failed download event. /// - public delegate void DownloadFinished(); + public delegate void DownloadFinished(bool success); private object _lock = new object(); @@ -32,15 +32,17 @@ internal class Scraper /// public string? FileExtension { get; set; } = null; + private int _successfulDownloads; /// /// Versions that successfully downloaded /// - public int SuccessfulDownloads { get; private set; } + public int SuccessfulDownloads => _successfulDownloads; + private int _failedDownloads; /// /// Versions that failed to download /// - public int FailedDownloads { get; private set; } + public int FailedDownloads => _failedDownloads; /// /// Event that fires upon a successful or failed download. @@ -180,6 +182,8 @@ public async Task GetAssetDeliveryInformation() if (response.StatusCode == HttpStatusCode.Conflict) return (false, "Insufficient permissions to download asset", ""); + else if (response.StatusCode == HttpStatusCode.Forbidden) + return (false, "Asset version has been deleted", ""); if (!IsSuccessStatusCode(response.StatusCode, allowForbidden: true)) // 403 means that the latest version is deleted but can still download return (false, $"Unhandled status code ({(int)response.StatusCode}) ({await response.Content.ReadAsStringAsync()})", ""); @@ -280,8 +284,8 @@ private async Task LogAssetFromCdnHttpMessageResponse(HttpResponseMessage respon /// private void FireAssetSuccess() { - SuccessfulDownloads++; - OnDownloadFinished?.Invoke(); + Interlocked.Increment(ref _successfulDownloads); + OnDownloadFinished?.Invoke(true); } /// @@ -289,8 +293,8 @@ private void FireAssetSuccess() /// private void FireAssetFailed() { - SuccessfulDownloads++; - OnDownloadFinished?.Invoke(); + Interlocked.Increment(ref _failedDownloads); + OnDownloadFinished?.Invoke(false); } /// @@ -330,8 +334,13 @@ public async Task StartWorker() FireAssetFailed(); continue; } - - if (!IsSuccessStatusCode(cdnResponse.StatusCode)) + else if (cdnResponse.StatusCode == HttpStatusCode.TooManyRequests) + { + LogAsset(error: $"Failed to fetch {AssetId} v{version} ({cdnUrl}): Too many requests", version: version); + FireAssetFailed(); + continue; + } + else if (!IsSuccessStatusCode(cdnResponse.StatusCode)) { LogAsset(error: $"Failed to fetch {AssetId} v{version} ({cdnUrl}): Unknown status code ({(int)cdnResponse.StatusCode})", version: version); FireAssetFailed(); @@ -349,16 +358,16 @@ public async Task StartWorker() /// public void PrintDownloadStatistics() { - Console.WriteLine($"Successful Downloads: {SuccessfulDownloads}"); - Console.WriteLine($"Failed Downloads: {FailedDownloads}"); - Console.WriteLine($"Total Downloads: {SuccessfulDownloads + FailedDownloads}"); + Console.WriteLine($"{AssetId} | Successful Downloads: {SuccessfulDownloads}"); + Console.WriteLine($"{AssetId} | Failed Downloads: {FailedDownloads}"); + Console.WriteLine($"{AssetId} | Total Downloads: {SuccessfulDownloads + FailedDownloads}"); } /// /// Writes the index file /// /// Index header - public void WriteIndexFile(string header) + public void WriteIndexFile() { if (!Config.Default.OutputType.IsIndexEnabled()) return; @@ -375,7 +384,7 @@ public void WriteIndexFile(string header) // create index file contents StringBuilder builder = new StringBuilder(); - builder.AppendLine(header); + builder.AppendLine($"{AssetId} asset versions on {DateTime.Now.ToString("R")} ({TotalVersions} versions)"); foreach (AssetOutput asset in _index) builder.AppendLine(asset.ToString()); From 1d7025f056e41f5c59fba5d8eea2e5b00e4611e9 Mon Sep 17 00:00:00 2001 From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com> Date: Sun, 17 Aug 2025 22:27:57 +0100 Subject: [PATCH 03/26] add back roblox user-agent --- RobloxUltimateScraper/Http.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RobloxUltimateScraper/Http.cs b/RobloxUltimateScraper/Http.cs index a81be3d..3b3ffc7 100644 --- a/RobloxUltimateScraper/Http.cs +++ b/RobloxUltimateScraper/Http.cs @@ -41,7 +41,7 @@ static Http() { Timeout = TimeSpan.FromSeconds(Config.Default.HttpTimeout) }; - //_HttpClient.DefaultRequestHeaders.Add("User-Agent", "Roblox/WinINet"); + Client.DefaultRequestHeaders.Add("User-Agent", "Roblox/WinInet"); } } } From 17344aa1c9c254e80f4855bd5e1d63e8fa5265f3 Mon Sep 17 00:00:00 2001 From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com> Date: Sun, 17 Aug 2025 22:30:06 +0100 Subject: [PATCH 04/26] update AssetType enum --- RobloxUltimateScraper/Enums/AssetType.cs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/RobloxUltimateScraper/Enums/AssetType.cs b/RobloxUltimateScraper/Enums/AssetType.cs index d3006fb..be4bb62 100644 --- a/RobloxUltimateScraper/Enums/AssetType.cs +++ b/RobloxUltimateScraper/Enums/AssetType.cs @@ -1,8 +1,5 @@ namespace RobloxUltimateScraper.Enums { - /// - /// Automatically generated, do not modify. - /// internal enum AssetType { Product = 0, @@ -81,7 +78,10 @@ internal enum AssetType CodeSnippet = 80, AdsVideo = 81, OtaUpdate = 82, - Screenshot = 83 + Screenshot = 83, + RuntimePropertySet = 84, + StorePreviewVideo = 85, + GamePreviewVideo = 86 } internal static class AssetTypeEx @@ -164,7 +164,10 @@ internal static class AssetTypeEx [AssetType.CodeSnippet] = null, [AssetType.AdsVideo] = null, [AssetType.OtaUpdate] = null, - [AssetType.Screenshot] = null + [AssetType.Screenshot] = null, + [AssetType.RuntimePropertySet] = null, + [AssetType.StorePreviewVideo] = null, + [AssetType.GamePreviewVideo] = null }; public static string? GetExtension(this AssetType type) From 1a7537e1685f7dd2af374ec3a10cefe2e5215d29 Mon Sep 17 00:00:00 2001 From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com> Date: Sun, 17 Aug 2025 22:31:57 +0100 Subject: [PATCH 05/26] bump zstdsharp version --- RobloxUltimateScraper/RobloxUltimateScraper.csproj | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/RobloxUltimateScraper/RobloxUltimateScraper.csproj b/RobloxUltimateScraper/RobloxUltimateScraper.csproj index 7195b27..5d7b5a4 100644 --- a/RobloxUltimateScraper/RobloxUltimateScraper.csproj +++ b/RobloxUltimateScraper/RobloxUltimateScraper.csproj @@ -1,4 +1,4 @@ - + Exe @@ -12,7 +12,7 @@ - + From 0bde15bef061268ccee7557cb92c75ad2920971b Mon Sep 17 00:00:00 2001 From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com> Date: Mon, 6 Oct 2025 21:53:58 +0100 Subject: [PATCH 06/26] update AssetType enum and extension map --- RobloxUltimateScraper/Enums/AssetType.cs | 26 +++++++++++++----------- RobloxUltimateScraper/Scraper.cs | 2 +- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/RobloxUltimateScraper/Enums/AssetType.cs b/RobloxUltimateScraper/Enums/AssetType.cs index be4bb62..7a0f1a6 100644 --- a/RobloxUltimateScraper/Enums/AssetType.cs +++ b/RobloxUltimateScraper/Enums/AssetType.cs @@ -2,7 +2,7 @@ { internal enum AssetType { - Product = 0, + Unknown = 0, Image = 1, TShirt = 2, Audio = 3, @@ -81,14 +81,15 @@ internal enum AssetType Screenshot = 83, RuntimePropertySet = 84, StorePreviewVideo = 85, - GamePreviewVideo = 86 + GamePreviewVideo = 86, + CreatorExperienceConfig = 87 } internal static class AssetTypeEx { private static readonly Dictionary _extensionMap = new Dictionary() { - [AssetType.Product] = null, + [AssetType.Unknown] = null, [AssetType.Image] = "png", // TODO: auto detect what type of image it is [AssetType.TShirt] = "rbxm", [AssetType.Audio] = "ogg", @@ -119,7 +120,7 @@ internal static class AssetTypeEx [AssetType.Package] = "txt", [AssetType.YouTubeVideo] = null, [AssetType.GamePass] = null, - [AssetType.App] = "rbxm", + [AssetType.App] = "rbxl", [AssetType.Code] = null, [AssetType.Plugin] = "rbxm", [AssetType.SolidModel] = "rbxm", @@ -140,11 +141,11 @@ internal static class AssetTypeEx [AssetType.SwimAnimation] = "rbxm", [AssetType.WalkAnimation] = "rbxm", [AssetType.PoseAnimation] = "rbxm", - [AssetType.LocalizationTableManifest] = null, - [AssetType.LocalizationTableTranslation] = null, + [AssetType.LocalizationTableManifest] = "json", + [AssetType.LocalizationTableTranslation] = "json", [AssetType.EmoteAnimation] = "rbxm", [AssetType.Video] = null, - [AssetType.TexturePack] = null, + [AssetType.TexturePack] = "xml", [AssetType.TShirtAccessory] = "rbxm", [AssetType.ShirtAccessory] = "rbxm", [AssetType.PantsAccessory] = "rbxm", @@ -154,20 +155,21 @@ internal static class AssetTypeEx [AssetType.LeftShoeAccessory] = "rbxm", [AssetType.RightShoeAccessory] = "rbxm", [AssetType.DressSkirtAccessory] = "rbxm", - [AssetType.FontFamily] = null, - [AssetType.FontFace] = null, - [AssetType.MeshHiddenSurfaceRemoval] = null, + [AssetType.FontFamily] = "json", + [AssetType.FontFace] = "ttf", + [AssetType.MeshHiddenSurfaceRemoval] = "rbxm", [AssetType.EyebrowAccessory] = "rbxm", [AssetType.EyelashAccessory] = "rbxm", [AssetType.MoodAnimation] = "rbxm", [AssetType.DynamicHead] = "rbxm", [AssetType.CodeSnippet] = null, [AssetType.AdsVideo] = null, - [AssetType.OtaUpdate] = null, + [AssetType.OtaUpdate] = "rbxm", [AssetType.Screenshot] = null, [AssetType.RuntimePropertySet] = null, [AssetType.StorePreviewVideo] = null, - [AssetType.GamePreviewVideo] = null + [AssetType.GamePreviewVideo] = null, + [AssetType.CreatorExperienceConfig] = null }; public static string? GetExtension(this AssetType type) diff --git a/RobloxUltimateScraper/Scraper.cs b/RobloxUltimateScraper/Scraper.cs index dcc27f4..4c9c583 100644 --- a/RobloxUltimateScraper/Scraper.cs +++ b/RobloxUltimateScraper/Scraper.cs @@ -163,7 +163,7 @@ public async Task GetAssetDeliveryInformation() if (!Enum.TryParse(versionsStr, out assetType)) { Debug.Assert(false); - assetType = AssetType.Product; + assetType = AssetType.Unknown; } } From 2313f236b89e9d0dd195fe791c1e8089ce9f4b92 Mon Sep 17 00:00:00 2001 From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com> Date: Mon, 6 Oct 2025 21:54:52 +0100 Subject: [PATCH 07/26] fix last-modified crash --- RobloxUltimateScraper/Scraper.cs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/RobloxUltimateScraper/Scraper.cs b/RobloxUltimateScraper/Scraper.cs index 4c9c583..259d08f 100644 --- a/RobloxUltimateScraper/Scraper.cs +++ b/RobloxUltimateScraper/Scraper.cs @@ -251,7 +251,9 @@ private async Task LogAssetFromCdnHttpMessageResponse(HttpResponseMessage respon string cdnUrl) { // get last modified - string? lastModified = response.Content.Headers.GetValues("last-modified").FirstOrDefault(); + string? lastModified = null; + if (response.Content.Headers.TryGetValues("last-modified", out IEnumerable? lastModifiedValues)) + lastModified = lastModifiedValues.First(); double? fileSize = null; From 3e20639ecc459b72bc3c92da022c065af51b32c2 Mon Sep 17 00:00:00 2001 From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com> Date: Mon, 6 Oct 2025 21:55:22 +0100 Subject: [PATCH 08/26] use doubles when calculating the file size --- RobloxUltimateScraper/Scraper.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RobloxUltimateScraper/Scraper.cs b/RobloxUltimateScraper/Scraper.cs index 259d08f..c021030 100644 --- a/RobloxUltimateScraper/Scraper.cs +++ b/RobloxUltimateScraper/Scraper.cs @@ -259,7 +259,7 @@ private async Task LogAssetFromCdnHttpMessageResponse(HttpResponseMessage respon using (Stream stream = await response.Content.ReadAsStreamAsync()) { - fileSize = Math.Round(stream.Length / 1024f / 1024f, 6); + fileSize = Math.Round(stream.Length / 1024.0 / 1024.0, 6); if (Config.Default.OutputType.IsFileSavingEnabled()) { From 3a849c29f7cf993a9e9ce9a1856b3c01a8b9d757 Mon Sep 17 00:00:00 2001 From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com> Date: Mon, 6 Oct 2025 21:57:45 +0100 Subject: [PATCH 09/26] remove documentation for removed arguments --- RobloxUltimateScraper/Scraper.cs | 5 ----- 1 file changed, 5 deletions(-) diff --git a/RobloxUltimateScraper/Scraper.cs b/RobloxUltimateScraper/Scraper.cs index c021030..27fe1e8 100644 --- a/RobloxUltimateScraper/Scraper.cs +++ b/RobloxUltimateScraper/Scraper.cs @@ -86,7 +86,6 @@ public async Task Setup() /// /// Creates a request to https://assetdelivery.roblox.com/v1/asset/ /// - /// Asset Id /// Asset Version (0 for latest) /// Http response public Task AssetRequest(int version = 0) @@ -128,7 +127,6 @@ public struct AssetDeliveryInformation /// /// Retrieves information from asset delivery /// - /// Asset Id /// Asset delivery information public async Task GetAssetDeliveryInformation() { @@ -173,7 +171,6 @@ public async Task GetAssetDeliveryInformation() /// /// Retrieves the CDN url from an asset id /// - /// Asset Id /// Version (0 for latest) /// Success, Error string, CDN url public async Task<(bool, string, string)> GetCdnUrl(int version = 0) @@ -199,7 +196,6 @@ public async Task GetAssetDeliveryInformation() /// /// Constructs the asset output path /// - /// Id /// Version /// Asset output path public string BuildAssetOutputFileName(int version) @@ -368,7 +364,6 @@ public void PrintDownloadStatistics() /// /// Writes the index file /// - /// Index header public void WriteIndexFile() { if (!Config.Default.OutputType.IsIndexEnabled()) From 10362ca3f2c6fb4ad74494dfc0cb09dcfa8574ac Mon Sep 17 00:00:00 2001 From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com> Date: Tue, 7 Oct 2025 14:45:47 +0100 Subject: [PATCH 10/26] update roblox auth loading --- RobloxUltimateScraper/CommandLineConfig.cs | 12 +++ RobloxUltimateScraper/Config.cs | 6 ++ .../Enums/RobloxAuthStatus.cs | 25 +++++ RobloxUltimateScraper/Http.cs | 92 ++++++++++++++----- RobloxUltimateScraper/Program.cs | 82 ++++++++++++++++- 5 files changed, 191 insertions(+), 26 deletions(-) create mode 100644 RobloxUltimateScraper/Enums/RobloxAuthStatus.cs diff --git a/RobloxUltimateScraper/CommandLineConfig.cs b/RobloxUltimateScraper/CommandLineConfig.cs index e53ba6a..cd34edd 100644 --- a/RobloxUltimateScraper/CommandLineConfig.cs +++ b/RobloxUltimateScraper/CommandLineConfig.cs @@ -194,5 +194,17 @@ public string BaseUrl [Option("trimcdnurlinconsole", Required = false, Default = null, HelpText = "Should the CDN url in console be trimmed.")] public bool? TrimCdnUrlInConsole { get; set; } + + /// + /// Disables checks responsible for checking if the current run is authenticated. + /// + [Option("disablerobloxauthchecks", Required = false, Default = false, HelpText = "Disables checks responsible for checking if the current run is authenticated.")] + public bool DisableRobloxAuthChecks { get; set; } = false; + + /// + /// Should fail if the current run is unauthenticated? + /// + [Option("failifunauthenticated", Required = false, Default = false, HelpText = "Should fail if the current run is unauthenticated?")] + public bool FailIfUnauthenticated { get; set; } = false; } } diff --git a/RobloxUltimateScraper/Config.cs b/RobloxUltimateScraper/Config.cs index 6ed8bf9..90568ab 100644 --- a/RobloxUltimateScraper/Config.cs +++ b/RobloxUltimateScraper/Config.cs @@ -35,6 +35,12 @@ internal class Config public bool TrimCdnUrlInConsole { get; } + /// + public bool DisableRobloxAuthChecks => _clConfig.DisableRobloxAuthChecks; + + /// + public bool FailIfUnauthenticated => _clConfig.FailIfUnauthenticated; + public Config(CommandLineConfig config) { _clConfig = config; diff --git a/RobloxUltimateScraper/Enums/RobloxAuthStatus.cs b/RobloxUltimateScraper/Enums/RobloxAuthStatus.cs new file mode 100644 index 0000000..363505b --- /dev/null +++ b/RobloxUltimateScraper/Enums/RobloxAuthStatus.cs @@ -0,0 +1,25 @@ +namespace RobloxUltimateScraper.Enums +{ + internal enum RobloxAuthStatus + { + /// + /// The auth cookie being used is valid + /// + Authenticated, + + /// + /// No authentication cookie is set + /// + Unauthenticated, + + /// + /// The auth cookie being used is invalid + /// + InvalidAuth, + + /// + /// An error occured while validating + /// + Error + } +} diff --git a/RobloxUltimateScraper/Http.cs b/RobloxUltimateScraper/Http.cs index 3b3ffc7..d5e709c 100644 --- a/RobloxUltimateScraper/Http.cs +++ b/RobloxUltimateScraper/Http.cs @@ -1,47 +1,89 @@ -using System.Net; +using System.Diagnostics; +using System.Net; namespace RobloxUltimateScraper { internal static class Http { - public static HttpClient Client { get; } + private static CookieContainer _cookieContainer = null!; - static Http() - { - CookieContainer cookieContainer = new CookieContainer(); - string? cookie = null; + /// + /// A singleton that can be shared across all threads + /// + public static HttpClient Client { get; private set; } = null!; - if (!string.IsNullOrEmpty(Config.Default.AuthCookie)) - { - Console.WriteLine("Using cookies from arguments."); - cookie = Config.Default.AuthCookie; - } - else - { - string? envValue = Environment.GetEnvironmentVariable("ROBLOXULTIMATESCRAPER_COOKIE"); - if (!string.IsNullOrEmpty(envValue)) - { - Console.WriteLine("Using cookies from environment variables."); - cookie = envValue; - } - } + /// + /// Boolean that indicates if a .ROBLOSECURITY cookie has been set for this session + /// + public static bool HasRobloxAuth { get; private set; } = false; - if (cookie != null) - cookieContainer.Add(new Cookie(".ROBLOSECURITY", cookie, "/", $".{Config.Default.BaseUrl}")); + /// + /// Creates a new with the appropriate settings for this scraper + /// + /// New HttpClient instance + public static HttpClient CreateClient() + { + Debug.Assert(_cookieContainer != null); HttpClientHandler httpClientHandler = new HttpClientHandler { AutomaticDecompression = DecompressionMethods.All, AllowAutoRedirect = false, // we are using v1 because v2 is bad - CookieContainer = cookieContainer, + CookieContainer = _cookieContainer, UseCookies = true }; - Client = new HttpClient(httpClientHandler) + HttpClient client = new HttpClient(httpClientHandler) { Timeout = TimeSpan.FromSeconds(Config.Default.HttpTimeout) }; - Client.DefaultRequestHeaders.Add("User-Agent", "Roblox/WinInet"); + client.DefaultRequestHeaders.Add("User-Agent", "Roblox/WinInet"); + + return client; + } + + private static string? GetRobloxAuthCookie() + { + if (!string.IsNullOrEmpty(Config.Default.AuthCookie)) + { + Console.WriteLine("Using auth cookie from arguments."); + return Config.Default.AuthCookie; + } + + string? envValue; + envValue = Environment.GetEnvironmentVariable("ROBLOXULTIMATESCRAPER_COOKIE"); + if (!string.IsNullOrEmpty(envValue)) + { + Console.WriteLine("Using auth cookie from environment variables (ROBLOXULTIMATESCRAPER_COOKIE)."); + return envValue; + } + + envValue = Environment.GetEnvironmentVariable("ROBLOXULTIMATESCRAPER_COOKIE_PATH"); + if (!string.IsNullOrEmpty(envValue)) + { + Console.WriteLine("Using auth cookie from environment variables (ROBLOXULTIMATESCRAPER_COOKIE_PATH)."); + + if (!File.Exists(envValue)) + throw new ApplicationException($"Can not read the auth cookie: File {envValue} does not exist."); + + return File.ReadAllText(envValue); + } + + return null; + } + + public static void Init() + { + _cookieContainer = new CookieContainer(); + + string? cookie = GetRobloxAuthCookie(); + if (cookie != null) + { + _cookieContainer.Add(new Cookie(".ROBLOSECURITY", cookie, "/", $".{Config.Default.BaseUrl}")); + HasRobloxAuth = true; + } + + Client = CreateClient(); } } } diff --git a/RobloxUltimateScraper/Program.cs b/RobloxUltimateScraper/Program.cs index f5c505b..b376ed7 100644 --- a/RobloxUltimateScraper/Program.cs +++ b/RobloxUltimateScraper/Program.cs @@ -1,6 +1,7 @@ using CommandLine; using CommandLine.Text; using RobloxUltimateScraper.Enums; +using System.Net; using System.Reflection; namespace RobloxUltimateScraper @@ -29,13 +30,27 @@ static void Run(CommandLineConfig config) Config.Initialise(config); + try + { + Http.Init(); + } + catch (Exception ex) + { + Console.WriteLine($"[ERROR]: Failed to initialise HTTP ({ex})"); + Environment.Exit(1); + return; + } + + if (!Config.Default.DisableRobloxAuthChecks) + CheckRobloxAuthStatus(); + // TODO: add functionality for // list // list versions switch (Config.Default.Scraper) { case ScraperType.None: - Console.WriteLine("No scraper chosen."); + Console.WriteLine("No scraper selected."); Console.WriteLine("Run the scraper with the --help argument for all commands."); break; @@ -71,6 +86,71 @@ static void Error(ParserResult config, IEnumerable err Console.WriteLine(text); } + static void CheckRobloxAuthStatus() + { + RobloxAuthStatus status = GetRobloxAuthStatus(out int httpCode, out Exception? exception); + + switch (status) + { + case RobloxAuthStatus.Unauthenticated: + Console.WriteLine("[WARNING]: No authentication is set for this run. You may face problems downloading assets not authored by Roblox."); + break; + + case RobloxAuthStatus.InvalidAuth: + Console.WriteLine("[ERROR]: Provided authentication cookie is invalid."); + Environment.Exit(1); + return; + + case RobloxAuthStatus.Error: + if (exception != null) + { + Console.WriteLine($"[WARNING]: Failed to check if authentication is valid ({exception.Message})."); + Console.WriteLine(exception); + } + else + { + Console.WriteLine($"[WARNING]: Failed to check if authentication is valid (got unexpected HTTP code {httpCode})."); + } + break; + + } + + if (status != RobloxAuthStatus.Authenticated && Config.Default.FailIfUnauthenticated) + { + Console.WriteLine("[ERROR]: Fail if unauthenticated flag is enabled. Stopping execution."); + Environment.Exit(1); + return; + } + } + + static RobloxAuthStatus GetRobloxAuthStatus(out int httpCode, out Exception? exception) + { + httpCode = 0; + exception = null; + + if (!Http.HasRobloxAuth) + return RobloxAuthStatus.Unauthenticated; + + try + { + HttpResponseMessage message = Http.Client.GetAsync("https://users.roblox.com/v1/users/authenticated").Result; + + httpCode = (int)message.StatusCode; + + return message.StatusCode switch + { + HttpStatusCode.OK => RobloxAuthStatus.Authenticated, + HttpStatusCode.Unauthorized => RobloxAuthStatus.InvalidAuth, + _ => RobloxAuthStatus.Error + }; + } + catch (Exception ex) + { + exception = ex; + return RobloxAuthStatus.Error; + } + } + // TODO: move asset scraper to a separate file /// From 4670cb8439c219114e321efd5c02685a42881eab Mon Sep 17 00:00:00 2001 From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com> Date: Tue, 7 Oct 2025 15:29:26 +0100 Subject: [PATCH 11/26] improve error handling in scraper --- RobloxUltimateScraper/Scraper.cs | 91 ++++++++++++++++++++++---------- 1 file changed, 64 insertions(+), 27 deletions(-) diff --git a/RobloxUltimateScraper/Scraper.cs b/RobloxUltimateScraper/Scraper.cs index 27fe1e8..93071c5 100644 --- a/RobloxUltimateScraper/Scraper.cs +++ b/RobloxUltimateScraper/Scraper.cs @@ -175,22 +175,69 @@ public async Task GetAssetDeliveryInformation() /// Success, Error string, CDN url public async Task<(bool, string, string)> GetCdnUrl(int version = 0) { - HttpResponseMessage response = await AssetRequest(version); + try + { + HttpResponseMessage response = await AssetRequest(version); - if (response.StatusCode == HttpStatusCode.Conflict) - return (false, "Insufficient permissions to download asset", ""); - else if (response.StatusCode == HttpStatusCode.Forbidden) - return (false, "Asset version has been deleted", ""); + switch (response.StatusCode) + { + case HttpStatusCode.Conflict: + return (false, "Insufficient permissions to download asset", ""); - if (!IsSuccessStatusCode(response.StatusCode, allowForbidden: true)) // 403 means that the latest version is deleted but can still download - return (false, $"Unhandled status code ({(int)response.StatusCode}) ({await response.Content.ReadAsStringAsync()})", ""); + case HttpStatusCode.Forbidden: + return (false, "Asset version has been deleted", ""); + + case HttpStatusCode.TooManyRequests: + return (false, "Too many requests", ""); + } + + if (!IsSuccessStatusCode(response.StatusCode, allowForbidden: true)) // 403 means that the latest version is deleted but can still download + return (false, $"Unhandled status code ({(int)response.StatusCode}) ({await response.Content.ReadAsStringAsync()})", ""); + + if (!response.Headers.TryGetValues("Location", out IEnumerable? values)) + return (false, "Location header is missing", ""); // this should never happen, but handle anyways + + string location = values.First(); + + return (true, "Success", location); + } + catch (Exception ex) + { + return (false, ex.ToString(), ""); + } + } + + /// + /// Gets content from the CDN using a specified URL + /// + /// CDN Url + /// Success, Error Message, HttpResponseMessage + public async Task<(bool, string, HttpResponseMessage?)> GetCdnContent(string url) + { + try + { + HttpResponseMessage response = await Http.Client.GetAsync(url); - if (!response.Headers.TryGetValues("Location", out IEnumerable? values)) - return (false, "Location header is missing", ""); // this should never happen, but handle anyways + switch (response.StatusCode) + { + case HttpStatusCode.Forbidden: + return (false, "Asset not found on CDN", null); - string location = values.First(); + case HttpStatusCode.TooManyRequests: + return (false, "Too many requests", null); + + default: + if (!IsSuccessStatusCode(response.StatusCode)) + return (false, $"Unknown status code ({(int)response.StatusCode})", null); + break; + } - return (true, "Success", location); + return (true, "Success", response); + } + catch (Exception ex) + { + return (false, ex.ToString(), null); + } } /// @@ -324,29 +371,19 @@ public async Task StartWorker() } // download the asset - HttpResponseMessage cdnResponse = await Http.Client.GetAsync(cdnUrl); + (bool cdnDownloadSuccess, string cdnDownloadMessage, HttpResponseMessage? cdnDownloadResponse) = await GetCdnContent(cdnUrl); - if (cdnResponse.StatusCode == HttpStatusCode.Forbidden) + if (!cdnDownloadSuccess) { - LogAsset(error: $"Failed to fetch {AssetId} v{version} ({cdnUrl}): Asset not found on CDN", version: version); - FireAssetFailed(); - continue; - } - else if (cdnResponse.StatusCode == HttpStatusCode.TooManyRequests) - { - LogAsset(error: $"Failed to fetch {AssetId} v{version} ({cdnUrl}): Too many requests", version: version); - FireAssetFailed(); - continue; - } - else if (!IsSuccessStatusCode(cdnResponse.StatusCode)) - { - LogAsset(error: $"Failed to fetch {AssetId} v{version} ({cdnUrl}): Unknown status code ({(int)cdnResponse.StatusCode})", version: version); + LogAsset(error: $"Failed to fetch {AssetId} v{version} ({cdnUrl}): {cdnDownloadMessage}", version: version); FireAssetFailed(); continue; } + Debug.Assert(cdnDownloadResponse != null); + // save! - await LogAssetFromCdnHttpMessageResponse(cdnResponse, version, cdnUrl); + await LogAssetFromCdnHttpMessageResponse(cdnDownloadResponse, version, cdnUrl); FireAssetSuccess(); } } From 678cc97d831258d1be59f653dcd033afdac35649 Mon Sep 17 00:00:00 2001 From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com> Date: Tue, 7 Oct 2025 15:40:50 +0100 Subject: [PATCH 12/26] ability to use multiple httpclients --- RobloxUltimateScraper/CommandLineConfig.cs | 7 +++++ RobloxUltimateScraper/Config.cs | 4 +++ .../Properties/launchSettings.json | 4 +-- RobloxUltimateScraper/Scraper.cs | 31 ++++++++++++------- 4 files changed, 33 insertions(+), 13 deletions(-) diff --git a/RobloxUltimateScraper/CommandLineConfig.cs b/RobloxUltimateScraper/CommandLineConfig.cs index cd34edd..3aaa156 100644 --- a/RobloxUltimateScraper/CommandLineConfig.cs +++ b/RobloxUltimateScraper/CommandLineConfig.cs @@ -206,5 +206,12 @@ public string BaseUrl /// [Option("failifunauthenticated", Required = false, Default = false, HelpText = "Should fail if the current run is unauthenticated?")] public bool FailIfUnauthenticated { get; set; } = false; + + [Option("singlehttpclient", Required = false, Default = false, HelpText = "Should only use a single and shared HTTP client for all scraper threads. This was the behaviour present in 0.1.3.0 and before. Other name: --shc.")] + public bool SingleHttpClient { get; set; } = false; + + // hack... again!!! + [Option("shc", Required = false, Hidden = true)] + public bool? SingleHttpClientOtherName { get; set; } } } diff --git a/RobloxUltimateScraper/Config.cs b/RobloxUltimateScraper/Config.cs index 90568ab..45a9ece 100644 --- a/RobloxUltimateScraper/Config.cs +++ b/RobloxUltimateScraper/Config.cs @@ -41,12 +41,16 @@ internal class Config /// public bool FailIfUnauthenticated => _clConfig.FailIfUnauthenticated; + /// + public bool SingleHttpClient { get; } + public Config(CommandLineConfig config) { _clConfig = config; CompressionLevel = _clConfig.CompressionLevelArgOtherName != null ? (int)_clConfig.CompressionLevelArgOtherName : _clConfig.CompressionLevelArg; TrimCdnUrlInConsole = _clConfig.TrimCdnUrlInConsole ?? OutputType != OutputType.Console; + SingleHttpClient = _clConfig.SingleHttpClientOtherName ?? _clConfig.SingleHttpClient; } public static void Initialise(CommandLineConfig commandLineConfig) diff --git a/RobloxUltimateScraper/Properties/launchSettings.json b/RobloxUltimateScraper/Properties/launchSettings.json index fc68c1c..c1cf511 100644 --- a/RobloxUltimateScraper/Properties/launchSettings.json +++ b/RobloxUltimateScraper/Properties/launchSettings.json @@ -5,11 +5,11 @@ }, "RobloxUltimateScraper - Asset Scraper": { "commandName": "Project", - "commandLineArgs": "-w 30\r\n-c Zstd\r\n--cl 15\r\n-d Crossroads\r\n-o Both\r\n-a 1818\r\n-i All" + "commandLineArgs": "-w 5\r\n-c Zstd\r\n--cl 15\r\n-d Crossroads\r\n-o Both\r\n-a 1818\r\n-i All" }, "RobloxUltimateScraper - Range Scraper": { "commandName": "Project", - "commandLineArgs": "-w 10\r\n-c Zstd\r\n--cl 9\r\n-o Both\r\n-r 1000000-1000100\r\n-i All" + "commandLineArgs": "-w 5\r\n-c Zstd\r\n--cl 9\r\n-o Both\r\n-r 1000000-1000100\r\n-i All" } } } \ No newline at end of file diff --git a/RobloxUltimateScraper/Scraper.cs b/RobloxUltimateScraper/Scraper.cs index 93071c5..0a964bd 100644 --- a/RobloxUltimateScraper/Scraper.cs +++ b/RobloxUltimateScraper/Scraper.cs @@ -71,7 +71,7 @@ public struct SetupResult public async Task Setup() { - var assetDeliveryInfo = await GetAssetDeliveryInformation(); + var assetDeliveryInfo = await GetAssetDeliveryInformation(Http.Client); if (!assetDeliveryInfo.Success) return new SetupResult { Success = false, Message = $"Failed to fetch versions for asset {AssetId}: {assetDeliveryInfo.Error}" }; @@ -86,12 +86,13 @@ public async Task Setup() /// /// Creates a request to https://assetdelivery.roblox.com/v1/asset/ /// + /// Http client /// Asset Version (0 for latest) /// Http response - public Task AssetRequest(int version = 0) + public Task AssetRequest(HttpClient httpClient, int version = 0) { string url = $"https://assetdelivery.{Config.Default.BaseUrl}/v1/asset/?id={AssetId}&version={version}"; - return Http.Client.GetAsync(url); + return httpClient.GetAsync(url); } /// @@ -128,9 +129,9 @@ public struct AssetDeliveryInformation /// Retrieves information from asset delivery /// /// Asset delivery information - public async Task GetAssetDeliveryInformation() + public async Task GetAssetDeliveryInformation(HttpClient httpClient) { - HttpResponseMessage response = await AssetRequest(); + HttpResponseMessage response = await AssetRequest(httpClient); if (response.StatusCode == HttpStatusCode.Conflict) return new AssetDeliveryInformation { Success = false, Error = "Insufficient permissions to download asset" }; @@ -171,13 +172,14 @@ public async Task GetAssetDeliveryInformation() /// /// Retrieves the CDN url from an asset id /// + /// Http client /// Version (0 for latest) /// Success, Error string, CDN url - public async Task<(bool, string, string)> GetCdnUrl(int version = 0) + public async Task<(bool, string, string)> GetCdnUrl(HttpClient httpClient, int version = 0) { try { - HttpResponseMessage response = await AssetRequest(version); + HttpResponseMessage response = await AssetRequest(httpClient, version); switch (response.StatusCode) { @@ -210,13 +212,14 @@ public async Task GetAssetDeliveryInformation() /// /// Gets content from the CDN using a specified URL /// + /// Http client /// CDN Url /// Success, Error Message, HttpResponseMessage - public async Task<(bool, string, HttpResponseMessage?)> GetCdnContent(string url) + public static async Task<(bool, string, HttpResponseMessage?)> GetCdnContent(HttpClient httpClient, string url) { try { - HttpResponseMessage response = await Http.Client.GetAsync(url); + HttpResponseMessage response = await httpClient.GetAsync(url); switch (response.StatusCode) { @@ -349,6 +352,8 @@ private void FireAssetFailed() // TODO: add try catch blocks. give 3 retries w/ exceptions public async Task StartWorker() { + HttpClient httpClient = Config.Default.SingleHttpClient ? Http.Client : Http.CreateClient(); + while (TotalVersions > CurrentVersion) { int version; @@ -361,7 +366,7 @@ public async Task StartWorker() } // get the url - (bool cdnGetSuccess, string cdnGetMessage, string cdnUrl) = await GetCdnUrl(version); + (bool cdnGetSuccess, string cdnGetMessage, string cdnUrl) = await GetCdnUrl(httpClient, version); if (!cdnGetSuccess) { @@ -371,7 +376,7 @@ public async Task StartWorker() } // download the asset - (bool cdnDownloadSuccess, string cdnDownloadMessage, HttpResponseMessage? cdnDownloadResponse) = await GetCdnContent(cdnUrl); + (bool cdnDownloadSuccess, string cdnDownloadMessage, HttpResponseMessage? cdnDownloadResponse) = await GetCdnContent(httpClient, cdnUrl); if (!cdnDownloadSuccess) { @@ -386,6 +391,10 @@ public async Task StartWorker() await LogAssetFromCdnHttpMessageResponse(cdnDownloadResponse, version, cdnUrl); FireAssetSuccess(); } + + // only dispose if it isnt the global http client + if (httpClient != Http.Client) + httpClient.Dispose(); } /// From 2b2f4307a72c07211a93c2fc60bd9b5687e4da09 Mon Sep 17 00:00:00 2001 From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com> Date: Tue, 7 Oct 2025 15:40:55 +0100 Subject: [PATCH 13/26] bump version --- RobloxUltimateScraper/RobloxUltimateScraper.csproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RobloxUltimateScraper/RobloxUltimateScraper.csproj b/RobloxUltimateScraper/RobloxUltimateScraper.csproj index 5d7b5a4..7ca064b 100644 --- a/RobloxUltimateScraper/RobloxUltimateScraper.csproj +++ b/RobloxUltimateScraper/RobloxUltimateScraper.csproj @@ -5,7 +5,7 @@ net6.0 enable enable - 0.1.3.0 + 0.1.4.0 false From 729aa887c226a1d2872a98b4d8ef6cd7ad236c24 Mon Sep 17 00:00:00 2001 From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com> Date: Tue, 7 Oct 2025 15:59:52 +0100 Subject: [PATCH 14/26] use .net 8.0 --- RobloxUltimateScraper/RobloxUltimateScraper.csproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RobloxUltimateScraper/RobloxUltimateScraper.csproj b/RobloxUltimateScraper/RobloxUltimateScraper.csproj index 7ca064b..77fcbef 100644 --- a/RobloxUltimateScraper/RobloxUltimateScraper.csproj +++ b/RobloxUltimateScraper/RobloxUltimateScraper.csproj @@ -2,7 +2,7 @@ Exe - net6.0 + net8.0 enable enable 0.1.4.0 From 97c654402a8585e35b08c6b67d4fff556acacbe6 Mon Sep 17 00:00:00 2001 From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com> Date: Tue, 7 Oct 2025 16:00:18 +0100 Subject: [PATCH 15/26] expanded trimcdnurl --- RobloxUltimateScraper/CommandLineConfig.cs | 7 ++- RobloxUltimateScraper/Config.cs | 4 +- RobloxUltimateScraper/Enums/OutputType.cs | 2 +- RobloxUltimateScraper/Enums/TrimCdnUrlType.cs | 51 +++++++++++++++++++ RobloxUltimateScraper/Scraper.cs | 4 +- 5 files changed, 61 insertions(+), 7 deletions(-) create mode 100644 RobloxUltimateScraper/Enums/TrimCdnUrlType.cs diff --git a/RobloxUltimateScraper/CommandLineConfig.cs b/RobloxUltimateScraper/CommandLineConfig.cs index 3aaa156..fd05c43 100644 --- a/RobloxUltimateScraper/CommandLineConfig.cs +++ b/RobloxUltimateScraper/CommandLineConfig.cs @@ -192,8 +192,11 @@ public string BaseUrl } } - [Option("trimcdnurlinconsole", Required = false, Default = null, HelpText = "Should the CDN url in console be trimmed.")] - public bool? TrimCdnUrlInConsole { get; set; } + /// + /// Decides where CDN url parameters should be trimmed. + /// + [Option("trim", Required = false, Default = TrimCdnUrlType.All, HelpText = "Decides where CDN url parameters should be trimmed. (Off, Console, Output, All)")] + public TrimCdnUrlType TrimCdnUrl { get; set; } = TrimCdnUrlType.All; /// /// Disables checks responsible for checking if the current run is authenticated. diff --git a/RobloxUltimateScraper/Config.cs b/RobloxUltimateScraper/Config.cs index 45a9ece..7ea58e5 100644 --- a/RobloxUltimateScraper/Config.cs +++ b/RobloxUltimateScraper/Config.cs @@ -33,7 +33,8 @@ internal class Config public string BaseUrl => _clConfig.BaseUrl; - public bool TrimCdnUrlInConsole { get; } + /// + public TrimCdnUrlType TrimCdnUrl => _clConfig.TrimCdnUrl; /// public bool DisableRobloxAuthChecks => _clConfig.DisableRobloxAuthChecks; @@ -49,7 +50,6 @@ public Config(CommandLineConfig config) _clConfig = config; CompressionLevel = _clConfig.CompressionLevelArgOtherName != null ? (int)_clConfig.CompressionLevelArgOtherName : _clConfig.CompressionLevelArg; - TrimCdnUrlInConsole = _clConfig.TrimCdnUrlInConsole ?? OutputType != OutputType.Console; SingleHttpClient = _clConfig.SingleHttpClientOtherName ?? _clConfig.SingleHttpClient; } diff --git a/RobloxUltimateScraper/Enums/OutputType.cs b/RobloxUltimateScraper/Enums/OutputType.cs index b3167f1..3cad91a 100644 --- a/RobloxUltimateScraper/Enums/OutputType.cs +++ b/RobloxUltimateScraper/Enums/OutputType.cs @@ -22,7 +22,7 @@ internal enum OutputType IndexOnly = 1, /// - /// Console output + /// Console output only /// Console = 2, diff --git a/RobloxUltimateScraper/Enums/TrimCdnUrlType.cs b/RobloxUltimateScraper/Enums/TrimCdnUrlType.cs new file mode 100644 index 0000000..2930a89 --- /dev/null +++ b/RobloxUltimateScraper/Enums/TrimCdnUrlType.cs @@ -0,0 +1,51 @@ +using System.Diagnostics; + +namespace RobloxUltimateScraper.Enums +{ + internal enum TrimCdnUrlType + { + /// + /// Turn off CDN url trimming entirely + /// + Off, + + /// + /// Should trim the CDN url for Console only + /// + Console, + + /// + /// Should trim the CDN url for Output only + /// + Output, + + /// + /// Should trim the CDN url for both Console and Output + /// + All + } + + internal static class TrimCdnUrlTypeEx + { + public static bool ShouldTrim(this TrimCdnUrlType type, OutputType outputType) + { + if (type == TrimCdnUrlType.Off) + return false; + if (type == TrimCdnUrlType.All) + return true; + + switch (outputType) + { + case OutputType.Console: + return type == TrimCdnUrlType.Console; + + case OutputType.Index: + return type == TrimCdnUrlType.Output; + + default: + Debug.Assert(false); + return false; + } + } + } +} diff --git a/RobloxUltimateScraper/Scraper.cs b/RobloxUltimateScraper/Scraper.cs index 0a964bd..81e7831 100644 --- a/RobloxUltimateScraper/Scraper.cs +++ b/RobloxUltimateScraper/Scraper.cs @@ -281,7 +281,7 @@ private void LogAsset( Error = error }; - Console.WriteLine(output.ToString(trimCdnUrl: Config.Default.TrimCdnUrlInConsole)); + Console.WriteLine(output.ToString(trimCdnUrl: Config.Default.TrimCdnUrl.ShouldTrim(OutputType.Console))); _index.Add(output); } @@ -430,7 +430,7 @@ public void WriteIndexFile() builder.AppendLine($"{AssetId} asset versions on {DateTime.Now.ToString("R")} ({TotalVersions} versions)"); foreach (AssetOutput asset in _index) - builder.AppendLine(asset.ToString()); + builder.AppendLine(asset.ToString(trimCdnUrl: Config.Default.TrimCdnUrl.ShouldTrim(OutputType.Index))); string contents = builder.ToString(); From 90da67d245331abb0a87809521bd8b3ac5b36395 Mon Sep 17 00:00:00 2001 From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com> Date: Tue, 7 Oct 2025 16:00:24 +0100 Subject: [PATCH 16/26] Revert "bump version" This reverts commit 2b2f4307a72c07211a93c2fc60bd9b5687e4da09. --- RobloxUltimateScraper/RobloxUltimateScraper.csproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RobloxUltimateScraper/RobloxUltimateScraper.csproj b/RobloxUltimateScraper/RobloxUltimateScraper.csproj index 77fcbef..24f9b5c 100644 --- a/RobloxUltimateScraper/RobloxUltimateScraper.csproj +++ b/RobloxUltimateScraper/RobloxUltimateScraper.csproj @@ -5,7 +5,7 @@ net8.0 enable enable - 0.1.4.0 + 0.1.3.0 false From ae29b69482342ebbfe46dde351ca0c0dce48401a Mon Sep 17 00:00:00 2001 From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com> Date: Tue, 7 Oct 2025 16:01:44 +0100 Subject: [PATCH 17/26] Update ci.yml --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 68ca551..b03e770 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,7 +20,7 @@ jobs: submodules: true - uses: actions/setup-dotnet@v4 with: - dotnet-version: '6.x' + dotnet-version: '8.x' - name: Restore dependencies run: dotnet restore - name: Build @@ -32,4 +32,4 @@ jobs: with: name: RobloxUltimateScraper (${{ matrix.configuration }}, ${{ matrix.platform }}) path: | - ./RobloxUltimateScraper/bin/${{ matrix.configuration }}/net6.0/${{ matrix.platform }}/publish/* \ No newline at end of file + ./RobloxUltimateScraper/bin/${{ matrix.configuration }}/net8.0/${{ matrix.platform }}/publish/* \ No newline at end of file From 92e0a6f2156af55515ab996465c2540d4edc8f2d Mon Sep 17 00:00:00 2001 From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com> Date: Tue, 7 Oct 2025 16:07:12 +0100 Subject: [PATCH 18/26] remove "WIP!" from range argument documentation --- RobloxUltimateScraper/CommandLineConfig.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RobloxUltimateScraper/CommandLineConfig.cs b/RobloxUltimateScraper/CommandLineConfig.cs index fd05c43..67b334b 100644 --- a/RobloxUltimateScraper/CommandLineConfig.cs +++ b/RobloxUltimateScraper/CommandLineConfig.cs @@ -83,7 +83,7 @@ public string UseListVersionsScraper /// Use the asset range scraper. /// COMMAND LINE USE ONLY! /// - [Option('r', "range", Required = false, HelpText = "Use the asset range scraper. Parameter takes in [Start ID]-[End ID]. WIP!")] + [Option('r', "range", Required = false, HelpText = "Use the asset range scraper. Parameter takes in [Start ID]-[End ID].")] public string UseRangeScraper { set From 5dbbe4373eb8b5af92e6bfd60208cf658d262588 Mon Sep 17 00:00:00 2001 From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com> Date: Tue, 7 Oct 2025 19:24:46 +0100 Subject: [PATCH 19/26] allow workers to be started with a specific httpclient should result in less httpclients being created overall with the range scraper now --- RobloxUltimateScraper/Program.cs | 2 +- RobloxUltimateScraper/RangeScraper.cs | 22 +++++++++++++++++++--- RobloxUltimateScraper/Scraper.cs | 26 ++++++++++++++++++-------- 3 files changed, 38 insertions(+), 12 deletions(-) diff --git a/RobloxUltimateScraper/Program.cs b/RobloxUltimateScraper/Program.cs index b376ed7..ba69648 100644 --- a/RobloxUltimateScraper/Program.cs +++ b/RobloxUltimateScraper/Program.cs @@ -181,7 +181,7 @@ static void RunAssetScraper() outputDirectory = Config.Default.OutputDirectory; Scraper scraper = new Scraper(assetId, outputDirectory); - scraper.Setup().Wait(); + scraper.Setup(Http.Client).Wait(); Console.WriteLine($"Asset {assetId} has {scraper.TotalVersions} versions!"); diff --git a/RobloxUltimateScraper/RangeScraper.cs b/RobloxUltimateScraper/RangeScraper.cs index 673c048..a4f8d91 100644 --- a/RobloxUltimateScraper/RangeScraper.cs +++ b/RobloxUltimateScraper/RangeScraper.cs @@ -34,7 +34,7 @@ public RangeScraper(ulong startRange, ulong endRange, string outputDirectory) OutputDirectory = outputDirectory; } - public async Task StartWorker() + public async Task StartWorker(HttpClient httpClient) { while (EndRange > CurrentId) { @@ -56,7 +56,7 @@ public async Task StartWorker() } Scraper scraper = new Scraper(id, outputDirectory); - var result = await scraper.Setup(); + var result = await scraper.Setup(httpClient); if (!result.Success) { Console.WriteLine($"Failed to download {id} ({result.Message})"); @@ -66,7 +66,7 @@ public async Task StartWorker() OnAssetVersionsDiscovered?.Invoke(scraper.TotalVersions); scraper.OnDownloadFinished += (bool success) => OnAssetDownloadFinished?.Invoke(success); - await scraper.StartWorker(); + await scraper.StartWorker(httpClient); scraper.PrintDownloadStatistics(); scraper.WriteIndexFile(); @@ -75,5 +75,21 @@ public async Task StartWorker() OnAssetFinished?.Invoke(); } } + + public async Task StartWorker() + { + HttpClient httpClient = Config.Default.SingleHttpClient ? Http.Client : Http.CreateClient(); + + try + { + await StartWorker(httpClient); + } + finally + { + // only dispose if it isnt the global http client + if (httpClient != Http.Client) + httpClient.Dispose(); + } + } } } diff --git a/RobloxUltimateScraper/Scraper.cs b/RobloxUltimateScraper/Scraper.cs index 81e7831..92f6fd5 100644 --- a/RobloxUltimateScraper/Scraper.cs +++ b/RobloxUltimateScraper/Scraper.cs @@ -69,9 +69,9 @@ public struct SetupResult public string Message; } - public async Task Setup() + public async Task Setup(HttpClient httpClient) { - var assetDeliveryInfo = await GetAssetDeliveryInformation(Http.Client); + var assetDeliveryInfo = await GetAssetDeliveryInformation(httpClient); if (!assetDeliveryInfo.Success) return new SetupResult { Success = false, Message = $"Failed to fetch versions for asset {AssetId}: {assetDeliveryInfo.Error}" }; @@ -350,10 +350,8 @@ private void FireAssetFailed() /// /// Worker // TODO: add try catch blocks. give 3 retries w/ exceptions - public async Task StartWorker() + public async Task StartWorker(HttpClient httpClient) { - HttpClient httpClient = Config.Default.SingleHttpClient ? Http.Client : Http.CreateClient(); - while (TotalVersions > CurrentVersion) { int version; @@ -391,10 +389,22 @@ public async Task StartWorker() await LogAssetFromCdnHttpMessageResponse(cdnDownloadResponse, version, cdnUrl); FireAssetSuccess(); } + } - // only dispose if it isnt the global http client - if (httpClient != Http.Client) - httpClient.Dispose(); + public async Task StartWorker() + { + HttpClient httpClient = Config.Default.SingleHttpClient ? Http.Client : Http.CreateClient(); + + try + { + await StartWorker(httpClient); + } + finally + { + // only dispose if it isnt the global http client + if (httpClient != Http.Client) + httpClient.Dispose(); + } } /// From 64fdea2f6cd37fe36762f71654e5a91600ddd558 Mon Sep 17 00:00:00 2001 From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com> Date: Tue, 7 Oct 2025 19:37:29 +0100 Subject: [PATCH 20/26] stop automatically compressing images when using range scraper --- RobloxUltimateScraper/CommandLineConfig.cs | 9 ++++++++ RobloxUltimateScraper/Config.cs | 3 +++ RobloxUltimateScraper/FileWriter.cs | 4 ++-- RobloxUltimateScraper/RangeScraper.cs | 9 +++++++- RobloxUltimateScraper/Scraper.cs | 26 +++++++++++++++++++++- 5 files changed, 47 insertions(+), 4 deletions(-) diff --git a/RobloxUltimateScraper/CommandLineConfig.cs b/RobloxUltimateScraper/CommandLineConfig.cs index 67b334b..6cd4648 100644 --- a/RobloxUltimateScraper/CommandLineConfig.cs +++ b/RobloxUltimateScraper/CommandLineConfig.cs @@ -210,11 +210,20 @@ public string BaseUrl [Option("failifunauthenticated", Required = false, Default = false, HelpText = "Should fail if the current run is unauthenticated?")] public bool FailIfUnauthenticated { get; set; } = false; + /// + /// Should only use a single and shared HTTP client for all scraper threads. + /// [Option("singlehttpclient", Required = false, Default = false, HelpText = "Should only use a single and shared HTTP client for all scraper threads. This was the behaviour present in 0.1.3.0 and before. Other name: --shc.")] public bool SingleHttpClient { get; set; } = false; // hack... again!!! [Option("shc", Required = false, Hidden = true)] public bool? SingleHttpClientOtherName { get; set; } + + /// + /// Should images gathered using the range scraper be compressed? + /// + [Option("compressimages", Required = false, Default = false, HelpText = "Should images gathered using the range scraper be compressed?")] + public bool CompressImages { get; set; } = false; } } diff --git a/RobloxUltimateScraper/Config.cs b/RobloxUltimateScraper/Config.cs index 7ea58e5..3772496 100644 --- a/RobloxUltimateScraper/Config.cs +++ b/RobloxUltimateScraper/Config.cs @@ -45,6 +45,9 @@ internal class Config /// public bool SingleHttpClient { get; } + /// + public bool CompressImages => _clConfig.CompressImages; + public Config(CommandLineConfig config) { _clConfig = config; diff --git a/RobloxUltimateScraper/FileWriter.cs b/RobloxUltimateScraper/FileWriter.cs index 5ebcb09..dbb0e20 100644 --- a/RobloxUltimateScraper/FileWriter.cs +++ b/RobloxUltimateScraper/FileWriter.cs @@ -23,11 +23,11 @@ public static string BuildOutputFileName(string fileName, string? fileExtension) /// File path /// Stream /// Last modified - public static void Save(string filePath, Stream stream, int compressionLevel, DateTime? lastModified = null) + public static void Save(string filePath, Stream stream, CompressionType compressionType, int compressionLevel, DateTime? lastModified = null) { using (MemoryStream ms = new MemoryStream()) { - switch (Config.Default.CompressionType) + switch (compressionType) { case CompressionType.GZip: ICSharpCode.SharpZipLib.GZip.GZip.Compress(stream, ms, false); diff --git a/RobloxUltimateScraper/RangeScraper.cs b/RobloxUltimateScraper/RangeScraper.cs index a4f8d91..a34d0b7 100644 --- a/RobloxUltimateScraper/RangeScraper.cs +++ b/RobloxUltimateScraper/RangeScraper.cs @@ -1,4 +1,6 @@ -namespace RobloxUltimateScraper +using RobloxUltimateScraper.Enums; + +namespace RobloxUltimateScraper { /// /// Range scraper @@ -66,6 +68,11 @@ public async Task StartWorker(HttpClient httpClient) OnAssetVersionsDiscovered?.Invoke(scraper.TotalVersions); scraper.OnDownloadFinished += (bool success) => OnAssetDownloadFinished?.Invoke(success); + + // do not bother compressing images gathered using this scraper (unless specified otherwise by the config) + if (scraper.AssetType == AssetType.Image && !Config.Default.CompressImages) + scraper.ShouldCompress = false; + await scraper.StartWorker(httpClient); scraper.PrintDownloadStatistics(); diff --git a/RobloxUltimateScraper/Scraper.cs b/RobloxUltimateScraper/Scraper.cs index 92f6fd5..1d42e80 100644 --- a/RobloxUltimateScraper/Scraper.cs +++ b/RobloxUltimateScraper/Scraper.cs @@ -19,12 +19,29 @@ internal class Scraper private object _lock = new object(); + /// + /// Asset ID being downloaded + /// public ulong AssetId { get; } + /// + /// Asset type of the asset + /// + public AssetType AssetType { get; private set; } = AssetType.Unknown; + + /// + /// Total versions present in this asset + /// public int TotalVersions { get; private set; } + /// + /// Most recent version being downloaded + /// public int CurrentVersion { get; private set; } + /// + /// Directory where to output the files to + /// public string OutputDirectory { get; } /// @@ -49,6 +66,11 @@ internal class Scraper /// public event DownloadFinished? OnDownloadFinished; + /// + /// Should compress the files downloaded? + /// + public bool ShouldCompress { get; set; } = true; + /// /// Index entries /// @@ -75,6 +97,7 @@ public async Task Setup(HttpClient httpClient) if (!assetDeliveryInfo.Success) return new SetupResult { Success = false, Message = $"Failed to fetch versions for asset {AssetId}: {assetDeliveryInfo.Error}" }; + AssetType = assetDeliveryInfo.AssetType; TotalVersions = assetDeliveryInfo.TotalVersions; FileExtension = Config.Default.OutputExtension == "Auto" ? assetDeliveryInfo.AssetType.GetExtension() : Config.Default.OutputExtension; @@ -315,7 +338,8 @@ private async Task LogAssetFromCdnHttpMessageResponse(HttpResponseMessage respon DateTime? lastModifiedDT = lastModified != null ? DateTime.Parse(lastModified) : null; - FileWriter.Save(outputPath, stream, Config.Default.CompressionLevel, lastModifiedDT); + CompressionType compressionType = ShouldCompress ? Config.Default.CompressionType : CompressionType.None; + FileWriter.Save(outputPath, stream, compressionType, Config.Default.CompressionLevel, lastModifiedDT); } } From 87dd5ef4e618974d02837d46c26f3d895879bf7b Mon Sep 17 00:00:00 2001 From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com> Date: Sat, 11 Oct 2025 21:35:58 +0100 Subject: [PATCH 21/26] allow file structure to be changed for range scraper --- RobloxUltimateScraper/CommandLineConfig.cs | 12 +++- RobloxUltimateScraper/Config.cs | 6 +- .../Enums/MultipleDownloadStructureType.cs | 20 ++++++ .../Properties/launchSettings.json | 2 +- RobloxUltimateScraper/RangeScraper.cs | 9 +-- RobloxUltimateScraper/Scraper.cs | 71 +++++++++++++++++-- 6 files changed, 107 insertions(+), 13 deletions(-) create mode 100644 RobloxUltimateScraper/Enums/MultipleDownloadStructureType.cs diff --git a/RobloxUltimateScraper/CommandLineConfig.cs b/RobloxUltimateScraper/CommandLineConfig.cs index 6cd4648..3429fb7 100644 --- a/RobloxUltimateScraper/CommandLineConfig.cs +++ b/RobloxUltimateScraper/CommandLineConfig.cs @@ -213,7 +213,7 @@ public string BaseUrl /// /// Should only use a single and shared HTTP client for all scraper threads. /// - [Option("singlehttpclient", Required = false, Default = false, HelpText = "Should only use a single and shared HTTP client for all scraper threads. This was the behaviour present in 0.1.3.0 and before. Other name: --shc.")] + [Option("singlehttpclient", Required = false, Default = false, HelpText = "Should only use a single and shared HTTP client for all scraper threads. This was the behaviour present in 0.1.2.0 and before. Other name: --shc.")] public bool SingleHttpClient { get; set; } = false; // hack... again!!! @@ -225,5 +225,15 @@ public string BaseUrl /// [Option("compressimages", Required = false, Default = false, HelpText = "Should images gathered using the range scraper be compressed?")] public bool CompressImages { get; set; } = false; + + /// + /// The file structure to use for range & list scrapes. This will not apply to single asset downloads. + /// + [Option("multipledownloadstructure", Required = false, Default = MultipleDownloadStructureType.Default, HelpText = "The file structure to use for range & list scrapes. This will not apply to single asset downloads. Other name: --mds.")] + public MultipleDownloadStructureType MultipleDownloadStructure { get; set; } = MultipleDownloadStructureType.Default; + + // hack... again!!! + [Option("mds", Required = false, Hidden = true)] + public MultipleDownloadStructureType? MultipleDownloadStructureOtherName { get; set; } } } diff --git a/RobloxUltimateScraper/Config.cs b/RobloxUltimateScraper/Config.cs index 3772496..da269ed 100644 --- a/RobloxUltimateScraper/Config.cs +++ b/RobloxUltimateScraper/Config.cs @@ -6,7 +6,7 @@ internal class Config { public static Config Default { get; private set; } = null!; - private CommandLineConfig _clConfig; + private readonly CommandLineConfig _clConfig; public ulong ScraperAssetId => _clConfig.ScraperAssetId; @@ -48,12 +48,16 @@ internal class Config /// public bool CompressImages => _clConfig.CompressImages; + /// + public MultipleDownloadStructureType MultipleDownloadStructure { get; } + public Config(CommandLineConfig config) { _clConfig = config; CompressionLevel = _clConfig.CompressionLevelArgOtherName != null ? (int)_clConfig.CompressionLevelArgOtherName : _clConfig.CompressionLevelArg; SingleHttpClient = _clConfig.SingleHttpClientOtherName ?? _clConfig.SingleHttpClient; + MultipleDownloadStructure = _clConfig.MultipleDownloadStructureOtherName ?? _clConfig.MultipleDownloadStructure; } public static void Initialise(CommandLineConfig commandLineConfig) diff --git a/RobloxUltimateScraper/Enums/MultipleDownloadStructureType.cs b/RobloxUltimateScraper/Enums/MultipleDownloadStructureType.cs new file mode 100644 index 0000000..7282f99 --- /dev/null +++ b/RobloxUltimateScraper/Enums/MultipleDownloadStructureType.cs @@ -0,0 +1,20 @@ +namespace RobloxUltimateScraper.Enums +{ + internal enum MultipleDownloadStructureType + { + /// + /// All asset downloads are put in their own directories + /// + Default, + + /// + /// All asset download files and index files are in the same directories + /// + Combined, + + /// + /// Asset download files and index files are put into their respective directories + /// + Separated + } +} diff --git a/RobloxUltimateScraper/Properties/launchSettings.json b/RobloxUltimateScraper/Properties/launchSettings.json index c1cf511..7620829 100644 --- a/RobloxUltimateScraper/Properties/launchSettings.json +++ b/RobloxUltimateScraper/Properties/launchSettings.json @@ -9,7 +9,7 @@ }, "RobloxUltimateScraper - Range Scraper": { "commandName": "Project", - "commandLineArgs": "-w 5\r\n-c Zstd\r\n--cl 9\r\n-o Both\r\n-r 1000000-1000100\r\n-i All" + "commandLineArgs": "-w 5\r\n-c Zstd\r\n--cl 9\r\n-o Both\r\n-r 1000000-1000100\r\n-i All\r\n--mds Separated" } } } \ No newline at end of file diff --git a/RobloxUltimateScraper/RangeScraper.cs b/RobloxUltimateScraper/RangeScraper.cs index a34d0b7..eea9a5f 100644 --- a/RobloxUltimateScraper/RangeScraper.cs +++ b/RobloxUltimateScraper/RangeScraper.cs @@ -49,15 +49,16 @@ public async Task StartWorker(HttpClient httpClient) id = CurrentId; } - string outputDirectory = Path.Combine(OutputDirectory, $"Asset_{id}"); - if (Directory.Exists(outputDirectory) && File.Exists(Path.Combine(outputDirectory, "index.txt"))) // index.txt is an indication that the download was finished. does not work for non-index runs. + Scraper scraper = new Scraper(id, OutputDirectory); + scraper.SetupMultipleDownloadStructure(Config.Default.MultipleDownloadStructure, $"Asset_{id}"); + + if (File.Exists(scraper.GetIndexFilePath())) // index.txt is an indication that the download was finished. does not work for non-index runs. { - Console.WriteLine($"Skipping {id} - already done. Delete the directory to redo the download."); + Console.WriteLine($"Skipping {id} - already done. Delete the file/directory to redo the download."); OnAssetFinished?.Invoke(); continue; } - Scraper scraper = new Scraper(id, outputDirectory); var result = await scraper.Setup(httpClient); if (!result.Success) { diff --git a/RobloxUltimateScraper/Scraper.cs b/RobloxUltimateScraper/Scraper.cs index 1d42e80..6afdb4c 100644 --- a/RobloxUltimateScraper/Scraper.cs +++ b/RobloxUltimateScraper/Scraper.cs @@ -17,7 +17,7 @@ internal class Scraper /// public delegate void DownloadFinished(bool success); - private object _lock = new object(); + private readonly object _lock = new object(); /// /// Asset ID being downloaded @@ -42,7 +42,17 @@ internal class Scraper /// /// Directory where to output the files to /// - public string OutputDirectory { get; } + public string OutputDirectory { get; set; } + + /// + /// Directory to output index files to. This value is prioritised over + /// + public string? IndexOutputDirectory { get; set; } + + /// + /// Directory to output asset files to. This value is prioritised over + /// + public string? FilesOutputDirectory { get; set; } /// /// File extension to be used for saving @@ -76,6 +86,11 @@ internal class Scraper /// private List _index = new List(); + /// + /// Name of the output index files + /// + private string _indexName = "index"; + /// /// Initialises values used by /// @@ -91,6 +106,31 @@ public struct SetupResult public string Message; } + /// + /// Sets up the directory paths further for multiple download scrapers. + /// + /// Name of the child directory if needed + /// Directory structure type + public void SetupMultipleDownloadStructure(MultipleDownloadStructureType type, string childDirectoryName) + { + switch (type) + { + case MultipleDownloadStructureType.Default: + OutputDirectory = Path.Combine(OutputDirectory, childDirectoryName); + break; + + case MultipleDownloadStructureType.Combined: + _indexName = $"{AssetId}_index"; + break; + + case MultipleDownloadStructureType.Separated: + _indexName = $"{AssetId}_index"; + IndexOutputDirectory = Path.Combine(OutputDirectory, "Indexes"); + FilesOutputDirectory = Path.Combine(OutputDirectory, "Files"); + break; + } + } + public async Task Setup(HttpClient httpClient) { var assetDeliveryInfo = await GetAssetDeliveryInformation(httpClient); @@ -101,8 +141,16 @@ public async Task Setup(HttpClient httpClient) TotalVersions = assetDeliveryInfo.TotalVersions; FileExtension = Config.Default.OutputExtension == "Auto" ? assetDeliveryInfo.AssetType.GetExtension() : Config.Default.OutputExtension; + + // create all the directories we need Directory.CreateDirectory(OutputDirectory); + if (!string.IsNullOrEmpty(IndexOutputDirectory)) + Directory.CreateDirectory(IndexOutputDirectory); + + if (!string.IsNullOrEmpty(FilesOutputDirectory)) + Directory.CreateDirectory(FilesOutputDirectory); + return new SetupResult { Success = true }; } @@ -319,6 +367,8 @@ private async Task LogAssetFromCdnHttpMessageResponse(HttpResponseMessage respon int version, string cdnUrl) { + string outputDir = FilesOutputDirectory ?? OutputDirectory; + // get last modified string? lastModified = null; if (response.Content.Headers.TryGetValues("last-modified", out IEnumerable? lastModifiedValues)) @@ -333,7 +383,7 @@ private async Task LogAssetFromCdnHttpMessageResponse(HttpResponseMessage respon if (Config.Default.OutputType.IsFileSavingEnabled()) { string outputName = BuildAssetOutputFileName(version); - string path = Path.Combine(OutputDirectory, outputName); + string path = Path.Combine(outputDir, outputName); string outputPath = FileWriter.BuildOutputFileName(path, FileExtension); DateTime? lastModifiedDT = lastModified != null ? DateTime.Parse(lastModified) : null; @@ -449,7 +499,9 @@ public void WriteIndexFile() if (!Config.Default.OutputType.IsIndexEnabled()) return; - Directory.CreateDirectory(OutputDirectory); + string outputDir = IndexOutputDirectory ?? OutputDirectory; + + Directory.CreateDirectory(outputDir); // sort index values _index.Sort(); @@ -468,7 +520,7 @@ public void WriteIndexFile() string contents = builder.ToString(); - string path = Path.Combine(OutputDirectory, "index.txt"); + string path = Path.Combine(outputDir, $"{_indexName}.txt"); indexPaths.Add(path); File.WriteAllText(path, contents); @@ -478,7 +530,7 @@ public void WriteIndexFile() { string contents = JsonSerializer.Serialize(_index); - string path = Path.Combine(OutputDirectory, "index.json"); + string path = Path.Combine(outputDir, $"{_indexName}.json"); indexPaths.Add(path); File.WriteAllText(path, contents); @@ -487,5 +539,12 @@ public void WriteIndexFile() // write information about index Console.WriteLine($"Index file(s) can be found at {string.Join(", ", indexPaths)}"); } + + /// + /// Gets the path of the index file + /// + /// Get the path of the JSON variation of the index + /// Index file path + public string GetIndexFilePath(bool json = false) => Path.Combine(IndexOutputDirectory ?? OutputDirectory, $"{_indexName}.{(json ? "json" : "txt")}"); } } From 3d207a4db3cb2617f16e15004c2c7c661964e037 Mon Sep 17 00:00:00 2001 From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com> Date: Sat, 11 Oct 2025 21:46:53 +0100 Subject: [PATCH 22/26] handle recent assetdelivery api changes --- RobloxUltimateScraper/Scraper.cs | 37 ++++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/RobloxUltimateScraper/Scraper.cs b/RobloxUltimateScraper/Scraper.cs index 6afdb4c..a23a230 100644 --- a/RobloxUltimateScraper/Scraper.cs +++ b/RobloxUltimateScraper/Scraper.cs @@ -187,6 +187,29 @@ public static bool IsSuccessStatusCode(HttpStatusCode code, bool allowForbidden } } + /// + /// Gets the status code from an assetdelivery response. + /// This function will use the field and the to deduce the correct status code. + /// + /// Response message from assetdelivery + /// Status code + private static HttpStatusCode GetAssetDeliveryStatusCode(HttpResponseMessage responseMessage) + { + switch (responseMessage.StatusCode) + { + case HttpStatusCode.Forbidden: + // roblox updated assetdelivery to return 403 for copylocked assets + // originally, it returned a 409 + // this breaks a bunch of existing logic, so we have to check the body aswell + string responseContent = responseMessage.Content.ReadAsStringAsync().Result; + bool isConflict = responseContent.Contains("User is not authorized to access Asset."); // this should suffice for now + return isConflict ? HttpStatusCode.Conflict : HttpStatusCode.Forbidden; + + default: + return responseMessage.StatusCode; + } + } + public struct AssetDeliveryInformation { public bool Success; @@ -203,12 +226,13 @@ public struct AssetDeliveryInformation public async Task GetAssetDeliveryInformation(HttpClient httpClient) { HttpResponseMessage response = await AssetRequest(httpClient); + HttpStatusCode statusCode = GetAssetDeliveryStatusCode(response); - if (response.StatusCode == HttpStatusCode.Conflict) + if (statusCode == HttpStatusCode.Conflict) return new AssetDeliveryInformation { Success = false, Error = "Insufficient permissions to download asset" }; - if (!IsSuccessStatusCode(response.StatusCode, allowForbidden: true)) // 403 means that the latest version is deleted but can still download - return new AssetDeliveryInformation { Success = false, Error = $"Unhandled status code ({(int)response.StatusCode})" }; + if (!IsSuccessStatusCode(statusCode, allowForbidden: true)) // 403 means that the latest version is deleted but can still download + return new AssetDeliveryInformation { Success = false, Error = $"Unhandled status code ({(int)statusCode})" }; IEnumerable? values; int versions; @@ -251,8 +275,9 @@ public async Task GetAssetDeliveryInformation(HttpClie try { HttpResponseMessage response = await AssetRequest(httpClient, version); + HttpStatusCode statusCode = GetAssetDeliveryStatusCode(response); - switch (response.StatusCode) + switch (statusCode) { case HttpStatusCode.Conflict: return (false, "Insufficient permissions to download asset", ""); @@ -264,8 +289,8 @@ public async Task GetAssetDeliveryInformation(HttpClie return (false, "Too many requests", ""); } - if (!IsSuccessStatusCode(response.StatusCode, allowForbidden: true)) // 403 means that the latest version is deleted but can still download - return (false, $"Unhandled status code ({(int)response.StatusCode}) ({await response.Content.ReadAsStringAsync()})", ""); + if (!IsSuccessStatusCode(statusCode)) + return (false, $"Unhandled status code ({(int)statusCode}) ({await response.Content.ReadAsStringAsync()})", ""); if (!response.Headers.TryGetValues("Location", out IEnumerable? values)) return (false, "Location header is missing", ""); // this should never happen, but handle anyways From 5cc049062fe8140c19e58f20e9fd7b0499a55b4a Mon Sep 17 00:00:00 2001 From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com> Date: Sat, 11 Oct 2025 21:49:37 +0100 Subject: [PATCH 23/26] exit early in single asset download for setup errors --- RobloxUltimateScraper/Program.cs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/RobloxUltimateScraper/Program.cs b/RobloxUltimateScraper/Program.cs index ba69648..ad06f3b 100644 --- a/RobloxUltimateScraper/Program.cs +++ b/RobloxUltimateScraper/Program.cs @@ -181,7 +181,12 @@ static void RunAssetScraper() outputDirectory = Config.Default.OutputDirectory; Scraper scraper = new Scraper(assetId, outputDirectory); - scraper.Setup(Http.Client).Wait(); + var setupResult = scraper.Setup(Http.Client).Result; + if (!setupResult.Success) + { + Console.WriteLine(setupResult.Message); + return; + } Console.WriteLine($"Asset {assetId} has {scraper.TotalVersions} versions!"); From 2a44ecf59281b453b88c9eb6d1d6c9ed859660a0 Mon Sep 17 00:00:00 2001 From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com> Date: Sat, 11 Oct 2025 21:58:31 +0100 Subject: [PATCH 24/26] asset type whitelist --- RobloxUltimateScraper/CommandLineConfig.cs | 10 ++++++++++ RobloxUltimateScraper/Config.cs | 7 +++++++ RobloxUltimateScraper/Program.cs | 6 ++++++ RobloxUltimateScraper/Properties/launchSettings.json | 2 +- RobloxUltimateScraper/RangeScraper.cs | 8 ++++++++ 5 files changed, 32 insertions(+), 1 deletion(-) diff --git a/RobloxUltimateScraper/CommandLineConfig.cs b/RobloxUltimateScraper/CommandLineConfig.cs index 3429fb7..c2748e2 100644 --- a/RobloxUltimateScraper/CommandLineConfig.cs +++ b/RobloxUltimateScraper/CommandLineConfig.cs @@ -235,5 +235,15 @@ public string BaseUrl // hack... again!!! [Option("mds", Required = false, Hidden = true)] public MultipleDownloadStructureType? MultipleDownloadStructureOtherName { get; set; } + + /// + /// Only download assets with the given asset type. + /// + [Option("expectedassettype", Required = false, Default = null, HelpText = "Only download assets with the given asset type. Other name: --eat")] + public AssetType? ExpectedAssetType { get; set; } + + // hack... again!!! + [Option("eat", Required = false, Hidden = true)] + public AssetType? ExpectedAssetTypeOtherName { get; set; } } } diff --git a/RobloxUltimateScraper/Config.cs b/RobloxUltimateScraper/Config.cs index da269ed..8dba540 100644 --- a/RobloxUltimateScraper/Config.cs +++ b/RobloxUltimateScraper/Config.cs @@ -51,6 +51,9 @@ internal class Config /// public MultipleDownloadStructureType MultipleDownloadStructure { get; } + /// + public AssetType? ExpectedAssetType { get; } + public Config(CommandLineConfig config) { _clConfig = config; @@ -58,6 +61,10 @@ public Config(CommandLineConfig config) CompressionLevel = _clConfig.CompressionLevelArgOtherName != null ? (int)_clConfig.CompressionLevelArgOtherName : _clConfig.CompressionLevelArg; SingleHttpClient = _clConfig.SingleHttpClientOtherName ?? _clConfig.SingleHttpClient; MultipleDownloadStructure = _clConfig.MultipleDownloadStructureOtherName ?? _clConfig.MultipleDownloadStructure; + ExpectedAssetType = _clConfig.ExpectedAssetTypeOtherName ?? _clConfig.ExpectedAssetType; + + if (ExpectedAssetType == AssetType.Unknown) + throw new ApplicationException("Invalid value for ExpectedAssetType"); } public static void Initialise(CommandLineConfig commandLineConfig) diff --git a/RobloxUltimateScraper/Program.cs b/RobloxUltimateScraper/Program.cs index ad06f3b..e0948f5 100644 --- a/RobloxUltimateScraper/Program.cs +++ b/RobloxUltimateScraper/Program.cs @@ -188,6 +188,12 @@ static void RunAssetScraper() return; } + if (Config.Default.ExpectedAssetType.HasValue && scraper.AssetType != Config.Default.ExpectedAssetType.Value) + { + Console.WriteLine($"Asset {assetId}'s type is not whitelisted (expected {Config.Default.ExpectedAssetType})"); + return; + } + Console.WriteLine($"Asset {assetId} has {scraper.TotalVersions} versions!"); // set up titles diff --git a/RobloxUltimateScraper/Properties/launchSettings.json b/RobloxUltimateScraper/Properties/launchSettings.json index 7620829..ec5a221 100644 --- a/RobloxUltimateScraper/Properties/launchSettings.json +++ b/RobloxUltimateScraper/Properties/launchSettings.json @@ -9,7 +9,7 @@ }, "RobloxUltimateScraper - Range Scraper": { "commandName": "Project", - "commandLineArgs": "-w 5\r\n-c Zstd\r\n--cl 9\r\n-o Both\r\n-r 1000000-1000100\r\n-i All\r\n--mds Separated" + "commandLineArgs": "-w 5\r\n-c Zstd\r\n--cl 9\r\n-o Both\r\n-r 1000000-1000100\r\n-i All\r\n--mds Separated\r\n--eat Image" } } } \ No newline at end of file diff --git a/RobloxUltimateScraper/RangeScraper.cs b/RobloxUltimateScraper/RangeScraper.cs index eea9a5f..27f5cb0 100644 --- a/RobloxUltimateScraper/RangeScraper.cs +++ b/RobloxUltimateScraper/RangeScraper.cs @@ -66,6 +66,14 @@ public async Task StartWorker(HttpClient httpClient) OnAssetError?.Invoke(); continue; } + + if (Config.Default.ExpectedAssetType.HasValue && scraper.AssetType != Config.Default.ExpectedAssetType.Value) + { + Console.WriteLine($"Asset {id}'s type is not whitelisted (expected {Config.Default.ExpectedAssetType})"); + OnAssetFinished?.Invoke(); + continue; + } + OnAssetVersionsDiscovered?.Invoke(scraper.TotalVersions); scraper.OnDownloadFinished += (bool success) => OnAssetDownloadFinished?.Invoke(success); From a2e8fe4fc69ecbef035cbeb80988d792ada4d498 Mon Sep 17 00:00:00 2001 From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com> Date: Sat, 11 Oct 2025 22:11:45 +0100 Subject: [PATCH 25/26] fix title bar version counter using id count instead of version count --- RobloxUltimateScraper/Program.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RobloxUltimateScraper/Program.cs b/RobloxUltimateScraper/Program.cs index e0948f5..e6d4875 100644 --- a/RobloxUltimateScraper/Program.cs +++ b/RobloxUltimateScraper/Program.cs @@ -233,7 +233,7 @@ static async Task RangeScraperTitleLogic(RangeScraperData data, CancellationToke { Console.Title = $"RobloxUltimateScraper | Range {data.StartRange}-{data.EndRange} | " + $"{data.DownloadedIds}/{data.TotalIds} IDs ({data.ErrorIds} errors) | " + - $"{data.DownloadedIds}/{data.TotalVersions} Versions ({data.ErrorVersions} errors)"; + $"{data.DownloadedVersions}/{data.TotalVersions} Versions ({data.ErrorVersions} errors)"; try { From c91348c2472655ea1840ffe0eb3161457ac0ded7 Mon Sep 17 00:00:00 2001 From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com> Date: Sat, 11 Oct 2025 22:14:33 +0100 Subject: [PATCH 26/26] fix empty directories being created with range scraper --- RobloxUltimateScraper/Program.cs | 2 ++ RobloxUltimateScraper/RangeScraper.cs | 1 + RobloxUltimateScraper/Scraper.cs | 10 ++++++++-- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/RobloxUltimateScraper/Program.cs b/RobloxUltimateScraper/Program.cs index e6d4875..e5eb149 100644 --- a/RobloxUltimateScraper/Program.cs +++ b/RobloxUltimateScraper/Program.cs @@ -194,6 +194,8 @@ static void RunAssetScraper() return; } + scraper.CreateOutputDirectories(); + Console.WriteLine($"Asset {assetId} has {scraper.TotalVersions} versions!"); // set up titles diff --git a/RobloxUltimateScraper/RangeScraper.cs b/RobloxUltimateScraper/RangeScraper.cs index 27f5cb0..6d86ca9 100644 --- a/RobloxUltimateScraper/RangeScraper.cs +++ b/RobloxUltimateScraper/RangeScraper.cs @@ -74,6 +74,7 @@ public async Task StartWorker(HttpClient httpClient) continue; } + scraper.CreateOutputDirectories(); OnAssetVersionsDiscovered?.Invoke(scraper.TotalVersions); scraper.OnDownloadFinished += (bool success) => OnAssetDownloadFinished?.Invoke(success); diff --git a/RobloxUltimateScraper/Scraper.cs b/RobloxUltimateScraper/Scraper.cs index a23a230..8aa269d 100644 --- a/RobloxUltimateScraper/Scraper.cs +++ b/RobloxUltimateScraper/Scraper.cs @@ -142,6 +142,14 @@ public async Task Setup(HttpClient httpClient) FileExtension = Config.Default.OutputExtension == "Auto" ? assetDeliveryInfo.AssetType.GetExtension() : Config.Default.OutputExtension; + return new SetupResult { Success = true }; + } + + /// + /// Creates all the necessary output directories + /// + public void CreateOutputDirectories() + { // create all the directories we need Directory.CreateDirectory(OutputDirectory); @@ -150,8 +158,6 @@ public async Task Setup(HttpClient httpClient) if (!string.IsNullOrEmpty(FilesOutputDirectory)) Directory.CreateDirectory(FilesOutputDirectory); - - return new SetupResult { Success = true }; } ///