From 54edb74166b12543ad819e25c35ed4920b78b8c2 Mon Sep 17 00:00:00 2001
From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com>
Date: Sun, 2 Feb 2025 19:44:17 +0000
Subject: [PATCH 01/26] refactor codebase
---
RobloxUltimateScraper/CommandLineConfig.cs | 198 +++++++++++
RobloxUltimateScraper/Config.cs | 319 ++----------------
.../Enums/CompressionType.cs | 28 ++
RobloxUltimateScraper/Enums/IndexType.cs | 23 ++
RobloxUltimateScraper/Enums/OutputType.cs | 40 +++
RobloxUltimateScraper/Enums/ScraperType.cs | 33 ++
RobloxUltimateScraper/FileWriter.cs | 7 +-
RobloxUltimateScraper/Http.cs | 47 +++
RobloxUltimateScraper/Models/AssetInput.cs | 14 -
RobloxUltimateScraper/Models/AssetOutput.cs | 20 +-
RobloxUltimateScraper/Program.cs | 68 ++--
RobloxUltimateScraper/Scraper.cs | 213 +++++-------
12 files changed, 501 insertions(+), 509 deletions(-)
create mode 100644 RobloxUltimateScraper/CommandLineConfig.cs
create mode 100644 RobloxUltimateScraper/Enums/CompressionType.cs
create mode 100644 RobloxUltimateScraper/Enums/IndexType.cs
create mode 100644 RobloxUltimateScraper/Enums/OutputType.cs
create mode 100644 RobloxUltimateScraper/Enums/ScraperType.cs
create mode 100644 RobloxUltimateScraper/Http.cs
delete mode 100644 RobloxUltimateScraper/Models/AssetInput.cs
diff --git a/RobloxUltimateScraper/CommandLineConfig.cs b/RobloxUltimateScraper/CommandLineConfig.cs
new file mode 100644
index 0000000..e53ba6a
--- /dev/null
+++ b/RobloxUltimateScraper/CommandLineConfig.cs
@@ -0,0 +1,198 @@
+using CommandLine;
+using RobloxUltimateScraper.Enums;
+
+namespace RobloxUltimateScraper
+{
+ ///
+ /// Scraper configuration
+ ///
+ internal class CommandLineConfig
+ {
+ ///
+ /// Selected scraper type.
+ ///
+ public ScraperType Scraper { get; set; } = ScraperType.None;
+
+ ///
+ /// Asset to scrape.
+ /// Should be used with scraper types .
+ ///
+ public ulong ScraperAssetId { get; set; } = 0;
+
+ ///
+ /// Asset list to scrape.
+ /// Should be used with scraper types and .
+ ///
+ public string ScraperListPath { get; set; } = string.Empty;
+
+ ///
+ /// Asset scrape start range.
+ /// Should be used with scraper types .
+ ///
+ public ulong ScraperStartRange { get; set; } = 0;
+
+ ///
+ /// Asset scrape end range.
+ /// Should be used with scraper types .
+ ///
+ public ulong ScraperEndRange { get; set; } = 0;
+
+ ///
+ /// Use the asset scraper.
+ /// COMMAND LINE USE ONLY!
+ ///
+ [Option('a', "asset", Required = false, HelpText = "Use the asset scraper. Parameter takes in an ID.")]
+ public ulong UseAssetScraper
+ {
+ set
+ {
+ Scraper = ScraperType.Asset;
+ ScraperAssetId = value;
+ }
+ }
+
+ ///
+ /// Use the asset list scraper.
+ /// COMMAND LINE USE ONLY!
+ ///
+ [Option('l', "list", Required = false, HelpText = "Use the asset list scraper. Parameter takes in a list path. WIP!")]
+ public string UseListScraper
+ {
+ set
+ {
+ Scraper = ScraperType.List;
+ ScraperListPath = value;
+ }
+ }
+
+ ///
+ /// Use the asset list versions scraper.
+ /// COMMAND LINE USE ONLY!
+ ///
+ [Option("listversions", Required = false, HelpText = "Use the asset list version scraper. Parameter takes in a list path. WIP!")]
+ public string UseListVersionsScraper
+ {
+ set
+ {
+ Scraper = ScraperType.ListVersions;
+ ScraperListPath = value;
+ }
+ }
+
+ ///
+ /// Use the asset range scraper.
+ /// COMMAND LINE USE ONLY!
+ ///
+ [Option('r', "range", Required = false, HelpText = "Use the asset range scraper. Parameter takes in [Start ID]-[End ID]. WIP!")]
+ public string UseRangeScraper
+ {
+ set
+ {
+ Scraper = ScraperType.Range;
+
+ // parse input
+ string[] segments = value.Split('-');
+
+ if (segments.Length != 2)
+ throw new ArgumentException("Parameter is not in valid format.");
+
+ if (!ulong.TryParse(segments[0], out ulong startRange))
+ throw new ArgumentException("Start range is not an integer.");
+
+ if (!ulong.TryParse(segments[1], out ulong endRange))
+ throw new ArgumentException("End range is not an integer.");
+
+ ScraperStartRange = startRange;
+ ScraperEndRange = endRange;
+ }
+ }
+
+ ///
+ /// Assets output type.
+ ///
+ [Option('o', "output", Required = false, Default = OutputType.Both, HelpText = "Assets output type. (Files, Index, Console, Both)")]
+ public OutputType OutputType { get; set; } = OutputType.Both;
+
+ ///
+ /// Index type.
+ ///
+ [Option('i', "index", Required = false, Default = IndexType.All, HelpText = "Index type. (Text, Json, All)")]
+ public IndexType IndexType { get; set; } = IndexType.All;
+
+ ///
+ /// Asset compression type.
+ ///
+ [Option('c', "compression", Required = false, Default = CompressionType.None, HelpText = "Compression type. (None, GZip, Bzip2, Zstd)")]
+ public CompressionType CompressionType { get; set; } = CompressionType.None;
+
+ [Option("compressionlevel", Required = false, Default = 9, HelpText = "Compression level for the compression. Only works for BZip2 (1-9) and Zstd (1-22). Other name: --cl.")]
+ public int CompressionLevelArg { get; set; } = 9; // 9 is good for both BZip2 and Zstd
+
+ // this sucks but commandlineparser has no way to set multiple names for an argument
+ // and short arguments are only allowed to be a single character
+ [Option("cl", Required = false, Hidden = true)]
+ public int? CompressionLevelArgOtherName { get; set; }
+
+ ///
+ /// Assets output directory.
+ ///
+ [Option('d', "directory", Required = false, HelpText = "Assets output directory.")]
+ public string OutputDirectory { get; set; } = "";
+
+ ///
+ /// Assets output extension.
+ ///
+ [Option('e', "extension", Required = false, Default = "Auto", HelpText = "Assets output extension. A value of 'Auto' will determine the extension based on the asset type.")]
+ public string OutputExtension { get; set; } = "Auto";
+
+ ///
+ /// Number of scrape workers.
+ ///
+ [Option('w', "workers", Required = false, Default = 1, HelpText = "Number of scrape workers.")]
+ public int Workers { get; set; } = 1;
+
+ ///
+ /// Roblox authentication cookie (ROBLOSECURITY).
+ /// For copylocked game scraping.
+ ///
+ [Option("cookies", Required = false, HelpText = "Roblox authentication cookie (.ROBLOSECURITY). This argument is prioritised over the environment variable 'ROBLOXULTIMATESCRAPER_COOKIE'.")]
+ public string? AuthCookie { get; set; }
+
+ ///
+ /// Http timeout in seconds.
+ ///
+ [Option('t', "timeout", Required = false, Default = 180, HelpText = "Http timeout in seconds.")]
+ public int HttpTimeout { get; set; } = 180;
+
+ private string _baseUrl = "roblox.com";
+
+ ///
+ /// Roblox environment to download from.
+ ///
+ [Option("baseurl", Required = false, Default = "www.roblox.com", HelpText = "Roblox environment to download from.")]
+ public string BaseUrl
+ {
+ get => _baseUrl;
+
+ set
+ {
+ if (value.StartsWith("http://"))
+ value = value[7..];
+ else if (value.StartsWith("https://"))
+ value = value[8..];
+
+ if (value.StartsWith("www.") || value.StartsWith("web."))
+ value = value[4..];
+
+ int idx = value.IndexOf('/');
+ if (idx != -1)
+ value = value[..idx];
+
+ _baseUrl = value;
+ }
+ }
+
+ [Option("trimcdnurlinconsole", Required = false, Default = null, HelpText = "Should the CDN url in console be trimmed.")]
+ public bool? TrimCdnUrlInConsole { get; set; }
+ }
+}
diff --git a/RobloxUltimateScraper/Config.cs b/RobloxUltimateScraper/Config.cs
index 08bab82..6ed8bf9 100644
--- a/RobloxUltimateScraper/Config.cs
+++ b/RobloxUltimateScraper/Config.cs
@@ -1,317 +1,54 @@
-using CommandLine;
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
+using RobloxUltimateScraper.Enums;
namespace RobloxUltimateScraper
{
- ///
- /// Output type
- ///
- internal enum OutputType
- {
- ///
- /// Asset files
- ///
- Files = 0,
-
- [Obsolete]
- FilesOnly = 0,
-
- ///
- /// Asset index
- ///
- Index = 1,
-
- [Obsolete]
- IndexOnly = 1,
-
- ///
- /// Console output
- ///
- Console = 2,
-
- ///
- /// Asset files and index
- ///
- Both = 3
- }
-
- ///
- /// Compression type on asset files
- ///
- internal enum CompressionType
- {
- ///
- /// No compression
- ///
- None,
-
- ///
- /// GZip compression
- ///
- GZip,
-
- ///
- /// BZip2 compression
- ///
- BZip2,
-
- ///
- /// Zstd compression
- ///
- Zstd
- }
-
- ///
- /// Index type
- ///
- internal enum IndexType
- {
- ///
- /// Text index
- ///
- Text,
-
- ///
- /// Json index
- ///
- Json,
-
- ///
- /// Text and json indexes
- ///
- All
- }
-
- ///
- /// Scraper type
- ///
- internal enum ScraperType
- {
- ///
- /// Asset version scraper
- ///
- Asset,
-
- ///
- /// Asset list scraper
- ///
- List,
-
- ///
- /// Asset list scraper, with versions
- ///
- ListVersions,
-
- ///
- /// Asset range scraper
- ///
- Range
- }
-
- ///
- /// Scraper configuration
- ///
internal class Config
{
- ///
- /// singleton.
- ///
- public static Config Default { get; set; } = default!;
+ public static Config Default { get; private set; } = null!;
- ///
- /// Selected scraper type.
- ///
- public ScraperType? Scraper { get; set; }
+ private CommandLineConfig _clConfig;
- ///
- /// Asset to scrape.
- /// Should be used with scraper types .
- ///
- public long ScraperId { get; set; } = 0;
+ public ulong ScraperAssetId => _clConfig.ScraperAssetId;
- ///
- /// Asset list to scrape.
- /// Should be used with scraper types and .
- ///
- public string ScraperListPath { get; set; } = string.Empty;
+ public string ScraperListPath => _clConfig.ScraperListPath;
- ///
- /// Asset scrape start range.
- /// Should be used with scraper types .
- ///
- public long ScraperStartRange { get; set; } = 0;
+ public ulong ScraperStartRange => _clConfig.ScraperStartRange;
+ public ulong ScraperEndRange => _clConfig.ScraperEndRange;
- ///
- /// Asset scrape end range.
- /// Should be used with scraper types .
- ///
- public long ScraperEndRange { get; set; } = 0;
+ public ScraperType Scraper => _clConfig.Scraper;
+ public OutputType OutputType => _clConfig.OutputType;
+ public IndexType IndexType => _clConfig.IndexType;
- ///
- /// Use the asset scraper.
- /// COMMAND LINE USE ONLY!
- ///
- [Option('a', "asset", Required = false, HelpText = "Use the asset scraper. Parameter takes in an ID.")]
- public long UseAssetScraper
- {
- set
- {
- Scraper = ScraperType.Asset;
- ScraperId = value;
- }
- }
+ public CompressionType CompressionType => _clConfig.CompressionType;
+ public int CompressionLevel { get; }
- ///
- /// Use the asset list scraper.
- /// COMMAND LINE USE ONLY!
- ///
- [Option('l', "list", Required = false, HelpText = "Use the asset list scraper. Parameter takes in a list path. WIP!")]
- public string UseListScraper
- {
- set
- {
- Scraper = ScraperType.List;
- ScraperListPath = value;
- }
- }
+ public string OutputDirectory => _clConfig.OutputDirectory;
+ public string OutputExtension => _clConfig.OutputExtension;
- ///
- /// Use the asset list versions scraper.
- /// COMMAND LINE USE ONLY!
- ///
- [Option("listversions", Required = false, HelpText = "Use the asset list version scraper. Parameter takes in a list path. WIP!")]
- public string UseListVersionsScraper
- {
- set
- {
- Scraper = ScraperType.ListVersions;
- ScraperListPath = value;
- }
- }
+ public int Workers => _clConfig.Workers;
- ///
- /// Use the asset range scraper.
- /// COMMAND LINE USE ONLY!
- ///
- [Option('r', "range", Required = false, HelpText = "Use the asset range scraper. Parameter takes in [Start ID]-[End ID]. WIP!")]
- public string UseRangeScraper
- {
- set
- {
- Scraper = ScraperType.Range;
+ public string? AuthCookie => _clConfig.AuthCookie;
- // parse input
- string[] segments = value.Split('-');
+ public int HttpTimeout => _clConfig.HttpTimeout;
- if (segments.Length != 2)
- throw new ArgumentException("Parameter is not in valid format.");
+ public string BaseUrl => _clConfig.BaseUrl;
- if (!long.TryParse(segments[0], out long startRange))
- throw new ArgumentException("Start range is not an integer.");
+ public bool TrimCdnUrlInConsole { get; }
- if (!long.TryParse(segments[1], out long endRange))
- throw new ArgumentException("End range is not an integer.");
+ public Config(CommandLineConfig config)
+ {
+ _clConfig = config;
- ScraperStartRange = startRange;
- ScraperEndRange = endRange;
- }
+ CompressionLevel = _clConfig.CompressionLevelArgOtherName != null ? (int)_clConfig.CompressionLevelArgOtherName : _clConfig.CompressionLevelArg;
+ TrimCdnUrlInConsole = _clConfig.TrimCdnUrlInConsole ?? OutputType != OutputType.Console;
}
- ///
- /// Assets output type.
- ///
- [Option('o', "output", Required = false, Default = OutputType.Both, HelpText = "Assets output type. (Files, Index, Console, Both)")]
- public OutputType OutputType { get; set; } = OutputType.Both;
-
- ///
- /// Index type.
- ///
- [Option('i', "index", Required = false, Default = IndexType.All, HelpText = "Index type. (Text, Json, All)")]
- public IndexType IndexType { get; set; } = IndexType.All;
-
- ///
- /// Asset compression type.
- ///
- [Option('c', "compression", Required = false, Default = CompressionType.None, HelpText = "Compression type. (None, GZip, Bzip2, Zstd)")]
- public CompressionType CompressionType { get; set; } = CompressionType.None;
-
- [Option("compressionlevel", Required = false, Default = 9, HelpText = "Compression level for the compression. Only works for BZip2 (1-9) and Zstd (1-22). Other name: --cl.")]
- public int CompressionLevelArg { get; set; } = 9; // 9 is good for both BZip2 and Zstd
-
- // this sucks but commandlineparser has no way to set multiple names for an argument
- // and short arguments are only allowed to be a single character
- [Option("cl", Required = false, Hidden = true)]
- public int? CompressionLevelArgOtherName { get; set; }
-
- ///
- /// Asset compression level.
- ///
- public int CompressionLevel { get => CompressionLevelArgOtherName != null ? (int)CompressionLevelArgOtherName : CompressionLevelArg; }
-
- ///
- /// Assets output directory.
- ///
- [Option('d', "directory", Required = false, HelpText = "Assets output directory.")]
- public string OutputDirectory { get; set; } = "";
-
- ///
- /// Assets output extension.
- ///
- [Option('e', "extension", Required = false, Default = "Auto", HelpText = "Assets output extension. A value of 'Auto' will determine the extension based on the asset type.")]
- public string OutputExtension { get; set; } = "Auto";
-
- ///
- /// Number of scrape workers.
- ///
- [Option('w', "workers", Required = false, Default = 1, HelpText = "Number of scrape workers.")]
- public int Workers { get; set; } = 1;
-
- ///
- /// Roblox authentication cookie (ROBLOSECURITY).
- /// For copylocked game scraping.
- ///
- [Option("cookies", Required = false, HelpText = "Roblox authentication cookie (.ROBLOSECURITY). This argument is prioritised over the environment variable 'ROBLOXULTIMATESCRAPER_COOKIE'.")]
- public string? AuthCookie { get; set; }
-
- ///
- /// Http timeout in seconds.
- ///
- [Option('t', "timeout", Required = false, Default = 180, HelpText = "Http timeout in seconds.")]
- public int HttpTimeout { get; set; } = 180;
-
- private string _baseUrl = "roblox.com";
-
- ///
- /// Roblox environment to download from.
- ///
- [Option("baseurl", Required = false, Default = "www.roblox.com", HelpText = "Roblox environment to download from.")]
- public string BaseUrl
+ public static void Initialise(CommandLineConfig commandLineConfig)
{
- get => _baseUrl;
-
- set
- {
- if (value.StartsWith("http://"))
- value = value[7..];
- else if (value.StartsWith("https://"))
- value = value[8..];
+ if (Default != null)
+ throw new Exception("Can not initialise Config twice.");
- if (value.StartsWith("www.") || value.StartsWith("web."))
- value = value[4..];
-
- int idx = value.IndexOf('/');
- if (idx != -1)
- value = value[..idx];
-
- _baseUrl = value;
- }
+ Default = new Config(commandLineConfig);
}
-
- [Option("trimcdnurlinconsole", Required = false, Default = null, HelpText = "Should the CDN url in console be trimmed.")]
- public bool? TrimCdnUrlInConsole { get; set; }
}
}
diff --git a/RobloxUltimateScraper/Enums/CompressionType.cs b/RobloxUltimateScraper/Enums/CompressionType.cs
new file mode 100644
index 0000000..8d4e9ef
--- /dev/null
+++ b/RobloxUltimateScraper/Enums/CompressionType.cs
@@ -0,0 +1,28 @@
+namespace RobloxUltimateScraper.Enums
+{
+ ///
+ /// Compression type on asset files
+ ///
+ internal enum CompressionType
+ {
+ ///
+ /// No compression
+ ///
+ None,
+
+ ///
+ /// GZip compression
+ ///
+ GZip,
+
+ ///
+ /// BZip2 compression
+ ///
+ BZip2,
+
+ ///
+ /// Zstd compression
+ ///
+ Zstd
+ }
+}
diff --git a/RobloxUltimateScraper/Enums/IndexType.cs b/RobloxUltimateScraper/Enums/IndexType.cs
new file mode 100644
index 0000000..7641eb5
--- /dev/null
+++ b/RobloxUltimateScraper/Enums/IndexType.cs
@@ -0,0 +1,23 @@
+namespace RobloxUltimateScraper.Enums
+{
+ ///
+ /// Index type
+ ///
+ internal enum IndexType
+ {
+ ///
+ /// Text index
+ ///
+ Text,
+
+ ///
+ /// Json index
+ ///
+ Json,
+
+ ///
+ /// Text and json indexes
+ ///
+ All
+ }
+}
diff --git a/RobloxUltimateScraper/Enums/OutputType.cs b/RobloxUltimateScraper/Enums/OutputType.cs
new file mode 100644
index 0000000..b3167f1
--- /dev/null
+++ b/RobloxUltimateScraper/Enums/OutputType.cs
@@ -0,0 +1,40 @@
+namespace RobloxUltimateScraper.Enums
+{
+ ///
+ /// Asset output type
+ ///
+ internal enum OutputType
+ {
+ ///
+ /// Asset files
+ ///
+ Files = 0,
+
+ [Obsolete]
+ FilesOnly = 0,
+
+ ///
+ /// Asset index
+ ///
+ Index = 1,
+
+ [Obsolete]
+ IndexOnly = 1,
+
+ ///
+ /// Console output
+ ///
+ Console = 2,
+
+ ///
+ /// Asset files and index
+ ///
+ Both = 3
+ }
+
+ internal static class OutputTypeEx
+ {
+ public static bool IsFileSavingEnabled(this OutputType type) => type == OutputType.Files || type == OutputType.Both;
+ public static bool IsIndexEnabled(this OutputType type) => type == OutputType.Index || type == OutputType.Both;
+ }
+}
diff --git a/RobloxUltimateScraper/Enums/ScraperType.cs b/RobloxUltimateScraper/Enums/ScraperType.cs
new file mode 100644
index 0000000..586a2ba
--- /dev/null
+++ b/RobloxUltimateScraper/Enums/ScraperType.cs
@@ -0,0 +1,33 @@
+namespace RobloxUltimateScraper.Enums
+{
+ ///
+ /// Scraper type
+ ///
+ internal enum ScraperType
+ {
+ ///
+ /// No scraper selected
+ ///
+ None,
+
+ ///
+ /// Asset version scraper
+ ///
+ Asset,
+
+ ///
+ /// Asset list scraper
+ ///
+ List,
+
+ ///
+ /// Asset list scraper, with versions
+ ///
+ ListVersions,
+
+ ///
+ /// Asset range scraper
+ ///
+ Range
+ }
+}
diff --git a/RobloxUltimateScraper/FileWriter.cs b/RobloxUltimateScraper/FileWriter.cs
index 4a2882d..5ebcb09 100644
--- a/RobloxUltimateScraper/FileWriter.cs
+++ b/RobloxUltimateScraper/FileWriter.cs
@@ -1,9 +1,4 @@
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Reflection.Emit;
-using System.Text;
-using System.Threading.Tasks;
+using RobloxUltimateScraper.Enums;
namespace RobloxUltimateScraper
{
diff --git a/RobloxUltimateScraper/Http.cs b/RobloxUltimateScraper/Http.cs
new file mode 100644
index 0000000..a81be3d
--- /dev/null
+++ b/RobloxUltimateScraper/Http.cs
@@ -0,0 +1,47 @@
+using System.Net;
+
+namespace RobloxUltimateScraper
+{
+ internal static class Http
+ {
+ public static HttpClient Client { get; }
+
+ static Http()
+ {
+ CookieContainer cookieContainer = new CookieContainer();
+ string? cookie = null;
+
+ if (!string.IsNullOrEmpty(Config.Default.AuthCookie))
+ {
+ Console.WriteLine("Using cookies from arguments.");
+ cookie = Config.Default.AuthCookie;
+ }
+ else
+ {
+ string? envValue = Environment.GetEnvironmentVariable("ROBLOXULTIMATESCRAPER_COOKIE");
+ if (!string.IsNullOrEmpty(envValue))
+ {
+ Console.WriteLine("Using cookies from environment variables.");
+ cookie = envValue;
+ }
+ }
+
+ if (cookie != null)
+ cookieContainer.Add(new Cookie(".ROBLOSECURITY", cookie, "/", $".{Config.Default.BaseUrl}"));
+
+ HttpClientHandler httpClientHandler = new HttpClientHandler
+ {
+ AutomaticDecompression = DecompressionMethods.All,
+ AllowAutoRedirect = false, // we are using v1 because v2 is bad
+ CookieContainer = cookieContainer,
+ UseCookies = true
+ };
+
+ Client = new HttpClient(httpClientHandler)
+ {
+ Timeout = TimeSpan.FromSeconds(Config.Default.HttpTimeout)
+ };
+ //_HttpClient.DefaultRequestHeaders.Add("User-Agent", "Roblox/WinINet");
+ }
+ }
+}
diff --git a/RobloxUltimateScraper/Models/AssetInput.cs b/RobloxUltimateScraper/Models/AssetInput.cs
deleted file mode 100644
index 9afd2fe..0000000
--- a/RobloxUltimateScraper/Models/AssetInput.cs
+++ /dev/null
@@ -1,14 +0,0 @@
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
-
-namespace RobloxUltimateScraper.Models
-{
- internal class AssetInput
- {
- public long Id { get; set; }
- public int Version { get; set; }
- }
-}
diff --git a/RobloxUltimateScraper/Models/AssetOutput.cs b/RobloxUltimateScraper/Models/AssetOutput.cs
index 1ad4ce4..c555d4f 100644
--- a/RobloxUltimateScraper/Models/AssetOutput.cs
+++ b/RobloxUltimateScraper/Models/AssetOutput.cs
@@ -1,22 +1,10 @@
-using System;
-using System.Collections.Generic;
-using System.Diagnostics;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
-
-namespace RobloxUltimateScraper.Models
+namespace RobloxUltimateScraper.Models
{
///
/// Asset information for index
///
internal class AssetOutput : IComparable
{
- ///
- /// Asset version
- ///
- public long Id { get; set; }
-
///
/// Asset version
///
@@ -67,7 +55,7 @@ internal class AssetOutput : IComparable
// 1818 | v1 | Error: failed to download
public string ToString(bool trimCdnUrl)
{
- string output = $"{Id} | v{Version}";
+ string output = $"v{Version}";
if (Error != null)
{
@@ -96,10 +84,6 @@ public int CompareTo(AssetOutput? other)
{
if (other == null) return 1;
- // compare asset ids
- if (Id > other.Id) return 1;
- if (Id < other.Id) return -1;
-
// compare versions
if (Version > other.Version) return 1;
if (Version < other.Version) return -1;
diff --git a/RobloxUltimateScraper/Program.cs b/RobloxUltimateScraper/Program.cs
index c9e9d12..b033ba2 100644
--- a/RobloxUltimateScraper/Program.cs
+++ b/RobloxUltimateScraper/Program.cs
@@ -1,7 +1,6 @@
using CommandLine;
using CommandLine.Text;
using RobloxUltimateScraper.Enums;
-using RobloxUltimateScraper.Models;
using System.Reflection;
namespace RobloxUltimateScraper
@@ -14,9 +13,9 @@ static async Task Main(string[] args)
args = new string[] { "--help" };
Parser cmdLineParser = new Parser(settings => settings.CaseInsensitiveEnumValues = true);
- ParserResult configParser = cmdLineParser.ParseArguments(args);
+ ParserResult configParser = cmdLineParser.ParseArguments(args);
configParser.WithNotParsed(errors => Error(configParser, errors));
- await configParser.WithParsedAsync(async config => await Run(config));
+ configParser.WithParsed(config => Run(config));
}
///
@@ -24,11 +23,11 @@ static async Task Main(string[] args)
///
/// Configuration
///
- static async Task Run(Config config)
+ static void Run(CommandLineConfig config)
{
Console.WriteLine($"RobloxUltimateScraper v{Assembly.GetExecutingAssembly().GetName().Version}");
- Config.Default = config;
+ Config.Initialise(config);
// TODO: add functionality for
// list
@@ -36,13 +35,13 @@ static async Task Run(Config config)
// range
switch (Config.Default.Scraper)
{
- case null:
- Console.WriteLine("Please define which scraper you wish to use!");
+ case ScraperType.None:
+ Console.WriteLine("No scraper chosen.");
Console.WriteLine("Run the scraper with the --help argument for all commands.");
break;
case ScraperType.Asset:
- await RunAssetScraper();
+ RunAssetScraper();
break;
case ScraperType.List:
@@ -67,7 +66,7 @@ static async Task Run(Config config)
/// Handles command line parsing failure
///
/// Errors from command line parser
- static void Error(ParserResult config, IEnumerable errors)
+ static void Error(ParserResult config, IEnumerable errors)
{
HelpText text = HelpText.AutoBuild(config);
Console.WriteLine(text);
@@ -82,63 +81,46 @@ static void Error(ParserResult config, IEnumerable errors)
/// Downloads
/// Errors
/// Versions
- static void SetAssetScraperTitle(long id, int downloaded, int errors, int total)
+ static void SetAssetScraperTitle(ulong id, int downloaded, int errors, int total)
{
- Console.Title = $"{nameof(RobloxUltimateScraper)} | Asset {id} | {downloaded}/{total} | {errors} Errors";
+ Console.Title = $"RobloxUltimateScraper | Asset {id} | {downloaded}/{total} | {errors} Errors";
}
///
/// Starts the asset scraper
///
///
- static async Task RunAssetScraper()
+ static void RunAssetScraper()
{
- long assetId = Config.Default.ScraperId;
+ ulong assetId = Config.Default.ScraperAssetId;
- if (string.IsNullOrEmpty(Config.Default.OutputDirectory) && !Scraper.ConsoleOnly)
- Config.Default.OutputDirectory = $"Asset_{assetId}";
+ string outputDirectory;
- Scraper.ShouldTrimCdnUrlInConsole = Config.Default.TrimCdnUrlInConsole ?? !Scraper.ConsoleOnly;
+ if (string.IsNullOrEmpty(Config.Default.OutputDirectory) && Config.Default.OutputType != OutputType.Console)
+ outputDirectory = $"Asset_{assetId}";
+ else
+ outputDirectory = Config.Default.OutputDirectory;
- // get all place versions
- var assetDeliveryInfo = await Scraper.GetAssetDeliveryInformation(assetId);
+ Scraper scraper = new Scraper(assetId, outputDirectory);
+ scraper.Setup().Wait();
- if (!assetDeliveryInfo.Success)
- {
- Console.WriteLine($"Failed to fetch versions for asset {assetId}: {assetDeliveryInfo.Error}");
- Environment.Exit(1);
- }
-
- Console.WriteLine($"Asset {assetId} has {assetDeliveryInfo.TotalVersions} versions!");
-
- Scraper.FileExtension = Config.Default.OutputExtension == "Auto" ? assetDeliveryInfo.AssetType.GetExtension() : Config.Default.OutputExtension;
- Scraper.CompressionLevel = Config.Default.CompressionLevel; // BZip2 and Zstd libraries automatically clamp the compression level
-
- // add to queue
- for (int i = 1; i <= assetDeliveryInfo.TotalVersions; i++)
- {
- Scraper.Assets.Enqueue(new AssetInput
- {
- Id = assetId,
- Version = i
- });
- }
+ Console.WriteLine($"Asset {assetId} has {scraper.TotalVersions} versions!");
// set up titles
- SetAssetScraperTitle(assetId, 0, 0, assetDeliveryInfo.TotalVersions);
- Scraper.OnDownloadFinished += () => SetAssetScraperTitle(assetId, Scraper.SuccessfulDownloads, Scraper.FailedDownloads, assetDeliveryInfo.TotalVersions);
+ SetAssetScraperTitle(assetId, 0, 0, scraper.TotalVersions);
+ scraper.OnDownloadFinished += () => SetAssetScraperTitle(assetId, scraper.SuccessfulDownloads, scraper.FailedDownloads, scraper.TotalVersions);
// start workers
List workers = new List();
for (int i = 1; i <= Config.Default.Workers; i++)
- workers.Add(Task.Run(Scraper.StartWorker));
+ workers.Add(Task.Run(scraper.StartWorker));
Task.WaitAll(workers.ToArray());
// finalise
- Scraper.PrintDownloadStatistics();
- Scraper.WriteIndexFile($"{assetId} asset versions on {DateTime.Now.ToString("R")} ({assetDeliveryInfo.TotalVersions} versions)");
+ scraper.PrintDownloadStatistics();
+ scraper.WriteIndexFile($"{assetId} asset versions on {DateTime.Now.ToString("R")} ({scraper.TotalVersions} versions)");
}
}
}
\ No newline at end of file
diff --git a/RobloxUltimateScraper/Scraper.cs b/RobloxUltimateScraper/Scraper.cs
index a32b18b..55120cd 100644
--- a/RobloxUltimateScraper/Scraper.cs
+++ b/RobloxUltimateScraper/Scraper.cs
@@ -1,138 +1,84 @@
using RobloxUltimateScraper.Enums;
using RobloxUltimateScraper.Models;
-using System;
-using System.Collections.Generic;
using System.Diagnostics;
-using System.Linq;
using System.Net;
using System.Text;
using System.Text.Json;
-using System.Threading.Tasks;
namespace RobloxUltimateScraper
{
///
- /// The core
+ /// Asset ID scraper
///
- internal static class Scraper
+ internal class Scraper
{
///
- /// Assets to download
+ /// Successful or failed download event.
///
- public static Queue Assets { get; }
+ public delegate void DownloadFinished();
- ///
- /// File extension to be used for saving
- ///
- public static string? FileExtension { get; set; } = null;
+ private object _lock = new object();
- ///
- /// Should the CDN url be trimmed in the console output
- ///
- public static bool ShouldTrimCdnUrlInConsole { get; set; } = true;
+ public ulong AssetId { get; }
- ///
- /// Compression level
- ///
- public static int CompressionLevel { get; set; } = 0;
+ public int TotalVersions { get; private set; }
- ///
- /// Is index enabled
- ///
- public static bool IndexEnabled { get { return Config.Default.OutputType == OutputType.Index || Config.Default.OutputType == OutputType.Both; } }
+ public int CurrentVersion { get; private set; }
- ///
- /// Are files enabled
- ///
- public static bool FilesEnabled { get { return Config.Default.OutputType == OutputType.Files || Config.Default.OutputType == OutputType.Both; } }
+ public string OutputDirectory { get; }
///
- /// Is console only
+ /// File extension to be used for saving
///
- public static bool ConsoleOnly { get { return Config.Default.OutputType == OutputType.Console; } }
+ public string? FileExtension { get; set; } = null;
///
/// Versions that successfully downloaded
///
- public static int SuccessfulDownloads { get; private set; }
+ public int SuccessfulDownloads { get; private set; }
///
/// Versions that failed to download
///
- public static int FailedDownloads { get; private set; }
-
- ///
- /// Successful or failed download event.
- ///
- public delegate void DownloadFinished();
+ public int FailedDownloads { get; private set; }
///
/// Event that fires upon a successful or failed download.
///
- public static event DownloadFinished? OnDownloadFinished;
-
- ///
- /// singleton.
- ///
- private static HttpClient _HttpClient { get; }
-
- ///
- /// Http client cookies.
- ///
- private static CookieContainer _CookieContainer { get; }
+ public event DownloadFinished? OnDownloadFinished;
///
/// Index entries
///
- private static List _Index { get; }
+ private List _index = new List();
///
/// Initialises values used by
///
- static Scraper()
+ public Scraper(ulong assetId, string outputDirectory)
{
- Assets = new Queue();
-
- SuccessfulDownloads = 0;
- FailedDownloads = 0;
-
- _CookieContainer = new CookieContainer();
+ AssetId = assetId;
+ OutputDirectory = outputDirectory;
+ }
- string? cookie = null;
+ public struct SetupResult
+ {
+ public bool Success;
+ public string Message;
+ }
- if (!string.IsNullOrEmpty(Config.Default.AuthCookie))
- {
- Console.WriteLine("Using cookies from arguments.");
- cookie = Config.Default.AuthCookie;
- }
- else
- {
- string? envValue = Environment.GetEnvironmentVariable("ROBLOXULTIMATESCRAPER_COOKIE");
- if (!string.IsNullOrEmpty(envValue))
- {
- Console.WriteLine("Using cookies from environment variables.");
- cookie = envValue;
- }
- }
-
- if (cookie != null)
- _CookieContainer.Add(new Cookie(".ROBLOSECURITY", cookie, "/", $".{Config.Default.BaseUrl}"));
+ public async Task Setup()
+ {
+ var assetDeliveryInfo = await GetAssetDeliveryInformation();
+ if (!assetDeliveryInfo.Success)
+ return new SetupResult { Success = false, Message = $"Failed to fetch versions for asset {AssetId}: {assetDeliveryInfo.Error}" };
- HttpClientHandler httpClientHandler = new HttpClientHandler
- {
- AutomaticDecompression = DecompressionMethods.All,
- AllowAutoRedirect = false, // we are using v1 because v2 is bad
- CookieContainer = _CookieContainer,
- UseCookies = true
- };
+ TotalVersions = assetDeliveryInfo.TotalVersions;
- _HttpClient = new HttpClient(httpClientHandler)
- {
- Timeout = TimeSpan.FromSeconds(Config.Default.HttpTimeout)
- };
- //_HttpClient.DefaultRequestHeaders.Add("User-Agent", "Roblox/WinINet");
+ FileExtension = Config.Default.OutputExtension == "Auto" ? assetDeliveryInfo.AssetType.GetExtension() : Config.Default.OutputExtension;
+ Directory.CreateDirectory(OutputDirectory);
- _Index = new List();
+ return new SetupResult { Success = true };
}
///
@@ -141,10 +87,10 @@ static Scraper()
/// Asset Id
/// Asset Version (0 for latest)
/// Http response
- public static Task AssetRequest(long id, int version = 0)
+ public Task AssetRequest(int version = 0)
{
- string url = $"https://assetdelivery.{Config.Default.BaseUrl}/v1/asset/?id={id}&version={version}";
- return _HttpClient.GetAsync(url);
+ string url = $"https://assetdelivery.{Config.Default.BaseUrl}/v1/asset/?id={AssetId}&version={version}";
+ return Http.Client.GetAsync(url);
}
///
@@ -182,9 +128,9 @@ public struct AssetDeliveryInformation
///
/// Asset Id
/// Asset delivery information
- public static async Task GetAssetDeliveryInformation(long id)
+ public async Task GetAssetDeliveryInformation()
{
- HttpResponseMessage response = await AssetRequest(id);
+ HttpResponseMessage response = await AssetRequest();
if (response.StatusCode == HttpStatusCode.Conflict)
return new AssetDeliveryInformation { Success = false, Error = "Insufficient permissions to download asset" };
@@ -193,8 +139,8 @@ public static async Task GetAssetDeliveryInformation(l
return new AssetDeliveryInformation { Success = false, Error = $"Unhandled status code ({(int)response.StatusCode})" };
IEnumerable? values;
- int versions = 0;
- AssetType assetType = 0;
+ int versions;
+ AssetType assetType;
{
if (!response.Headers.TryGetValues("roblox-assetversionnumber", out values))
@@ -228,9 +174,9 @@ public static async Task GetAssetDeliveryInformation(l
/// Asset Id
/// Version (0 for latest)
/// Success, Error string, CDN url
- public static async Task<(bool, string, string)> GetCDNUrl(long id, int version = 0)
+ public async Task<(bool, string, string)> GetCdnUrl(int version = 0)
{
- HttpResponseMessage response = await AssetRequest(id, version);
+ HttpResponseMessage response = await AssetRequest(version);
if (response.StatusCode == HttpStatusCode.Conflict)
return (false, "Insufficient permissions to download asset", "");
@@ -252,9 +198,9 @@ public static async Task GetAssetDeliveryInformation(l
/// Id
/// Version
/// Asset output path
- public static string BuildAssetOutputFileName(long id, int version)
+ public string BuildAssetOutputFileName(int version)
{
- string fileName = id.ToString();
+ string fileName = AssetId.ToString();
if (version != 0)
fileName += $"-v{version}";
@@ -264,13 +210,12 @@ public static string BuildAssetOutputFileName(long id, int version)
///
/// Logs an asset to index
///
- /// Id
/// Version
/// CDN url
/// File size in Mb
/// Last modified
/// Error message
- private static void LogAsset(long id,
+ private void LogAsset(
int version,
string? cdnUrl = null,
double? fileSizeInMb = null,
@@ -279,7 +224,6 @@ private static void LogAsset(long id,
{
AssetOutput output = new AssetOutput
{
- Id = id,
Version = version,
CDNUrl = cdnUrl,
FileSizeInMb = fileSizeInMb,
@@ -287,26 +231,21 @@ private static void LogAsset(long id,
Error = error
};
- Console.WriteLine(output.ToString(trimCdnUrl: ShouldTrimCdnUrlInConsole));
+ Console.WriteLine(output.ToString(trimCdnUrl: Config.Default.TrimCdnUrlInConsole));
- _Index.Add(output);
+ _index.Add(output);
}
///
/// Logs an asset to index and saves it
///
/// Http response messsage
- /// Id
/// Version
/// CDN url
- private static async Task LogAssetFromCDNHttpMessageResponse(HttpResponseMessage response,
- long id,
+ private async Task LogAssetFromCdnHttpMessageResponse(HttpResponseMessage response,
int version,
string cdnUrl)
{
- if (!ConsoleOnly)
- Directory.CreateDirectory(Config.Default.OutputDirectory);
-
// get last modified
string? lastModified = response.Content.Headers.GetValues("last-modified").FirstOrDefault();
@@ -316,20 +255,19 @@ private static async Task LogAssetFromCDNHttpMessageResponse(HttpResponseMessage
{
fileSize = Math.Round(stream.Length / 1024f / 1024f, 6);
- if (FilesEnabled)
+ if (Config.Default.OutputType.IsFileSavingEnabled())
{
- string outputName = BuildAssetOutputFileName(id, version);
- string path = Path.Combine(Config.Default.OutputDirectory, outputName);
+ string outputName = BuildAssetOutputFileName(version);
+ string path = Path.Combine(OutputDirectory, outputName);
string outputPath = FileWriter.BuildOutputFileName(path, FileExtension);
DateTime? lastModifiedDT = lastModified != null ? DateTime.Parse(lastModified) : null;
- FileWriter.Save(outputPath, stream, CompressionLevel, lastModifiedDT);
+ FileWriter.Save(outputPath, stream, Config.Default.CompressionLevel, lastModifiedDT);
}
}
LogAsset(
- id: id,
version: version,
cdnUrl: cdnUrl,
fileSizeInMb: fileSize,
@@ -340,7 +278,7 @@ private static async Task LogAssetFromCDNHttpMessageResponse(HttpResponseMessage
///
/// Increments and invokes .
///
- private static void FireAssetSuccess()
+ private void FireAssetSuccess()
{
SuccessfulDownloads++;
OnDownloadFinished?.Invoke();
@@ -349,7 +287,7 @@ private static void FireAssetSuccess()
///
/// Increments and invokes .
///
- private static void FireAssetFailed()
+ private void FireAssetFailed()
{
SuccessfulDownloads++;
OnDownloadFinished?.Invoke();
@@ -360,47 +298,48 @@ private static void FireAssetFailed()
///
/// Worker
// TODO: add try catch blocks. give 3 retries w/ exceptions
- public static async Task StartWorker()
+ public async Task StartWorker()
{
- while (Assets.Count > 0)
+ while (TotalVersions > CurrentVersion)
{
- AssetInput asset;
- lock (Assets)
+ int version;
+ lock (_lock)
{
- if (Assets.Count == 0)
+ if (TotalVersions <= CurrentVersion)
continue;
- asset = Assets.Dequeue();
+ CurrentVersion++;
+ version = CurrentVersion;
}
// get the url
- (bool cdnGetSuccess, string cdnGetMessage, string cdnUrl) = await GetCDNUrl(asset.Id, asset.Version);
+ (bool cdnGetSuccess, string cdnGetMessage, string cdnUrl) = await GetCdnUrl(version);
if (!cdnGetSuccess)
{
- LogAsset(error: $"Failed to fetch {asset.Id} v{asset.Version}: {cdnGetMessage}", id: asset.Id, version: asset.Version);
+ LogAsset(error: $"Failed to fetch {AssetId} v{version}: {cdnGetMessage}", version: version);
FireAssetFailed();
continue;
}
// download the asset
- HttpResponseMessage cdnResponse = await _HttpClient.GetAsync(cdnUrl);
+ HttpResponseMessage cdnResponse = await Http.Client.GetAsync(cdnUrl);
if (cdnResponse.StatusCode == HttpStatusCode.Forbidden)
{
- LogAsset(error: $"Failed to fetch {asset.Id} v{asset.Version} ({cdnUrl}): Asset not found on CDN", id: asset.Id, version: asset.Version);
+ LogAsset(error: $"Failed to fetch {AssetId} v{version} ({cdnUrl}): Asset not found on CDN", version: version);
FireAssetFailed();
continue;
}
if (!IsSuccessStatusCode(cdnResponse.StatusCode))
{
- LogAsset(error: $"Failed to fetch {asset.Id} v{asset.Version} ({cdnUrl}): Unknown status code ({(int)cdnResponse.StatusCode})", id: asset.Id, version: asset.Version);
+ LogAsset(error: $"Failed to fetch {AssetId} v{version} ({cdnUrl}): Unknown status code ({(int)cdnResponse.StatusCode})", version: version);
FireAssetFailed();
continue;
}
// save!
- await LogAssetFromCDNHttpMessageResponse(cdnResponse, asset.Id, asset.Version, cdnUrl);
+ await LogAssetFromCdnHttpMessageResponse(cdnResponse, version, cdnUrl);
FireAssetSuccess();
}
}
@@ -408,7 +347,7 @@ public static async Task StartWorker()
///
/// Prints download statistics
///
- public static void PrintDownloadStatistics()
+ public void PrintDownloadStatistics()
{
Console.WriteLine($"Successful Downloads: {SuccessfulDownloads}");
Console.WriteLine($"Failed Downloads: {FailedDownloads}");
@@ -419,15 +358,15 @@ public static void PrintDownloadStatistics()
/// Writes the index file
///
/// Index header
- public static void WriteIndexFile(string header)
+ public void WriteIndexFile(string header)
{
- if (!IndexEnabled)
+ if (!Config.Default.OutputType.IsIndexEnabled())
return;
- Directory.CreateDirectory(Config.Default.OutputDirectory);
+ Directory.CreateDirectory(OutputDirectory);
// sort index values
- _Index.Sort();
+ _index.Sort();
List indexPaths = new List();
@@ -438,12 +377,12 @@ public static void WriteIndexFile(string header)
builder.AppendLine(header);
- foreach (AssetOutput asset in _Index)
+ foreach (AssetOutput asset in _index)
builder.AppendLine(asset.ToString());
string contents = builder.ToString();
- string path = Path.Combine(Config.Default.OutputDirectory, "index.txt");
+ string path = Path.Combine(OutputDirectory, "index.txt");
indexPaths.Add(path);
File.WriteAllText(path, contents);
@@ -451,9 +390,9 @@ public static void WriteIndexFile(string header)
if (Config.Default.IndexType == IndexType.Json || Config.Default.IndexType == IndexType.All)
{
- string contents = JsonSerializer.Serialize(_Index);
+ string contents = JsonSerializer.Serialize(_index);
- string path = Path.Combine(Config.Default.OutputDirectory, "index.json");
+ string path = Path.Combine(OutputDirectory, "index.json");
indexPaths.Add(path);
File.WriteAllText(path, contents);
From 2194bd991c96faec977b51007b493cb35e5fe285 Mon Sep 17 00:00:00 2001
From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com>
Date: Sun, 17 Aug 2025 22:27:26 +0100
Subject: [PATCH 02/26] add range scraper
---
RobloxUltimateScraper/Program.cs | 93 +++++++++++++++++--
.../Properties/launchSettings.json | 4 +
RobloxUltimateScraper/RangeScraper.cs | 79 ++++++++++++++++
RobloxUltimateScraper/Scraper.cs | 37 +++++---
4 files changed, 193 insertions(+), 20 deletions(-)
create mode 100644 RobloxUltimateScraper/RangeScraper.cs
diff --git a/RobloxUltimateScraper/Program.cs b/RobloxUltimateScraper/Program.cs
index b033ba2..f5c505b 100644
--- a/RobloxUltimateScraper/Program.cs
+++ b/RobloxUltimateScraper/Program.cs
@@ -7,7 +7,7 @@ namespace RobloxUltimateScraper
{
internal class Program
{
- static async Task Main(string[] args)
+ static void Main(string[] args)
{
if (args.Length == 0) // make it display the help menu
args = new string[] { "--help" };
@@ -32,7 +32,6 @@ static void Run(CommandLineConfig config)
// TODO: add functionality for
// list
// list versions
- // range
switch (Config.Default.Scraper)
{
case ScraperType.None:
@@ -53,7 +52,7 @@ static void Run(CommandLineConfig config)
break;
case ScraperType.Range:
- Console.WriteLine("Range scraper has not been implemented yet.");
+ RunRangeScraper();
break;
default:
@@ -96,7 +95,7 @@ static void RunAssetScraper()
string outputDirectory;
- if (string.IsNullOrEmpty(Config.Default.OutputDirectory) && Config.Default.OutputType != OutputType.Console)
+ if (string.IsNullOrEmpty(Config.Default.OutputDirectory))
outputDirectory = $"Asset_{assetId}";
else
outputDirectory = Config.Default.OutputDirectory;
@@ -108,7 +107,7 @@ static void RunAssetScraper()
// set up titles
SetAssetScraperTitle(assetId, 0, 0, scraper.TotalVersions);
- scraper.OnDownloadFinished += () => SetAssetScraperTitle(assetId, scraper.SuccessfulDownloads, scraper.FailedDownloads, scraper.TotalVersions);
+ scraper.OnDownloadFinished += (_) => SetAssetScraperTitle(assetId, scraper.SuccessfulDownloads, scraper.FailedDownloads, scraper.TotalVersions);
// start workers
List workers = new List();
@@ -120,7 +119,89 @@ static void RunAssetScraper()
// finalise
scraper.PrintDownloadStatistics();
- scraper.WriteIndexFile($"{assetId} asset versions on {DateTime.Now.ToString("R")} ({scraper.TotalVersions} versions)");
+ scraper.WriteIndexFile();
+ }
+
+ class RangeScraperData
+ {
+ public ulong StartRange = 0;
+ public ulong EndRange = 0;
+
+ public ulong TotalIds = 0;
+ public int DownloadedIds = 0;
+ public int ErrorIds = 0;
+
+ public int TotalVersions = 0;
+ public int DownloadedVersions = 0;
+ public int ErrorVersions = 0;
+ }
+
+ static async Task RangeScraperTitleLogic(RangeScraperData data, CancellationToken token)
+ {
+ while (!token.IsCancellationRequested)
+ {
+ Console.Title = $"RobloxUltimateScraper | Range {data.StartRange}-{data.EndRange} | " +
+ $"{data.DownloadedIds}/{data.TotalIds} IDs ({data.ErrorIds} errors) | " +
+ $"{data.DownloadedIds}/{data.TotalVersions} Versions ({data.ErrorVersions} errors)";
+
+ try
+ {
+ await Task.Delay(1000, token);
+ }
+ catch (TaskCanceledException)
+ {
+ return;
+ }
+ }
+ }
+
+ static void RunRangeScraper()
+ {
+ string outputDirectory;
+
+ if (string.IsNullOrEmpty(Config.Default.OutputDirectory))
+ outputDirectory = $"Range_{DateTimeOffset.UtcNow.ToUnixTimeSeconds()}";
+ else
+ outputDirectory = Config.Default.OutputDirectory;
+
+ if (Config.Default.OutputType != OutputType.Console)
+ Directory.CreateDirectory(outputDirectory);
+
+ RangeScraperData data = new RangeScraperData()
+ {
+ StartRange = Config.Default.ScraperStartRange,
+ EndRange = Config.Default.ScraperEndRange,
+ TotalIds = Config.Default.ScraperEndRange - Config.Default.ScraperStartRange
+ };
+
+ RangeScraper scraper = new RangeScraper(Config.Default.ScraperStartRange, Config.Default.ScraperEndRange, outputDirectory);
+
+ // set up titles
+ scraper.OnAssetDownloadFinished += (bool success) => { Interlocked.Increment(ref data.DownloadedVersions); if (!success) { Interlocked.Increment(ref data.ErrorVersions); } };
+ scraper.OnAssetVersionsDiscovered += (int versions) => { Interlocked.Add(ref data.TotalVersions, versions); };
+
+ scraper.OnAssetFinished += () => Interlocked.Increment(ref data.DownloadedIds);
+ scraper.OnAssetError += () => { Interlocked.Increment(ref data.DownloadedIds); Interlocked.Increment(ref data.ErrorIds); };
+
+ CancellationTokenSource cts = new CancellationTokenSource();
+ Task titleUpdateTask = Task.Run(() => RangeScraperTitleLogic(data, cts.Token));
+
+ // start workers
+ List workers = new List();
+
+ for (int i = 1; i <= Config.Default.Workers; i++)
+ workers.Add(Task.Run(scraper.StartWorker));
+
+ Task.WaitAll(workers.ToArray());
+ cts.Cancel();
+
+ Console.WriteLine($"ID Range: {data.StartRange}-{data.EndRange}");
+ Console.WriteLine($"Total IDs: {data.TotalIds}");
+ Console.WriteLine($"Successful ID Downloads: {data.DownloadedIds}");
+ Console.WriteLine($"Failed ID Downloads: {data.ErrorIds}");
+ Console.WriteLine($"Total Versions: {data.TotalVersions}");
+ Console.WriteLine($"Successful Version Downloads: {data.DownloadedVersions}");
+ Console.WriteLine($"Failed Version Downloads: {data.ErrorVersions}");
}
}
}
\ No newline at end of file
diff --git a/RobloxUltimateScraper/Properties/launchSettings.json b/RobloxUltimateScraper/Properties/launchSettings.json
index 178e2e5..fc68c1c 100644
--- a/RobloxUltimateScraper/Properties/launchSettings.json
+++ b/RobloxUltimateScraper/Properties/launchSettings.json
@@ -6,6 +6,10 @@
"RobloxUltimateScraper - Asset Scraper": {
"commandName": "Project",
"commandLineArgs": "-w 30\r\n-c Zstd\r\n--cl 15\r\n-d Crossroads\r\n-o Both\r\n-a 1818\r\n-i All"
+ },
+ "RobloxUltimateScraper - Range Scraper": {
+ "commandName": "Project",
+ "commandLineArgs": "-w 10\r\n-c Zstd\r\n--cl 9\r\n-o Both\r\n-r 1000000-1000100\r\n-i All"
}
}
}
\ No newline at end of file
diff --git a/RobloxUltimateScraper/RangeScraper.cs b/RobloxUltimateScraper/RangeScraper.cs
new file mode 100644
index 0000000..673c048
--- /dev/null
+++ b/RobloxUltimateScraper/RangeScraper.cs
@@ -0,0 +1,79 @@
+namespace RobloxUltimateScraper
+{
+ ///
+ /// Range scraper
+ ///
+ internal class RangeScraper
+ {
+ public delegate void AssetDownloadFinished(bool success); // Once a version is finished downloading
+ public delegate void AssetVersionsDiscovered(int versions); // Once asset scraper returns total asset versions
+ public delegate void AssetFinished(); // Once an ID is finished downloading
+ public delegate void AssetError(); // Once asset scraper setup errors
+
+ public event AssetDownloadFinished? OnAssetDownloadFinished;
+ public event AssetVersionsDiscovered? OnAssetVersionsDiscovered;
+ public event AssetFinished? OnAssetFinished;
+ public event AssetError? OnAssetError;
+
+ public ulong StartRange { get; }
+ public ulong EndRange { get; }
+
+ public ulong CurrentId { get; private set; }
+
+ public string OutputDirectory { get; }
+
+ private object _lock = new object();
+
+ public RangeScraper(ulong startRange, ulong endRange, string outputDirectory)
+ {
+ StartRange = startRange;
+ EndRange = endRange;
+
+ CurrentId = startRange - 1;
+
+ OutputDirectory = outputDirectory;
+ }
+
+ public async Task StartWorker()
+ {
+ while (EndRange > CurrentId)
+ {
+ ulong id;
+ lock (_lock)
+ {
+ if (EndRange <= CurrentId)
+ continue;
+ CurrentId++;
+ id = CurrentId;
+ }
+
+ string outputDirectory = Path.Combine(OutputDirectory, $"Asset_{id}");
+ if (Directory.Exists(outputDirectory) && File.Exists(Path.Combine(outputDirectory, "index.txt"))) // index.txt is an indication that the download was finished. does not work for non-index runs.
+ {
+ Console.WriteLine($"Skipping {id} - already done. Delete the directory to redo the download.");
+ OnAssetFinished?.Invoke();
+ continue;
+ }
+
+ Scraper scraper = new Scraper(id, outputDirectory);
+ var result = await scraper.Setup();
+ if (!result.Success)
+ {
+ Console.WriteLine($"Failed to download {id} ({result.Message})");
+ OnAssetError?.Invoke();
+ continue;
+ }
+ OnAssetVersionsDiscovered?.Invoke(scraper.TotalVersions);
+
+ scraper.OnDownloadFinished += (bool success) => OnAssetDownloadFinished?.Invoke(success);
+ await scraper.StartWorker();
+
+ scraper.PrintDownloadStatistics();
+ scraper.WriteIndexFile();
+
+ Console.WriteLine($"{id} has been completed.");
+ OnAssetFinished?.Invoke();
+ }
+ }
+ }
+}
diff --git a/RobloxUltimateScraper/Scraper.cs b/RobloxUltimateScraper/Scraper.cs
index 55120cd..dcc27f4 100644
--- a/RobloxUltimateScraper/Scraper.cs
+++ b/RobloxUltimateScraper/Scraper.cs
@@ -15,7 +15,7 @@ internal class Scraper
///
/// Successful or failed download event.
///
- public delegate void DownloadFinished();
+ public delegate void DownloadFinished(bool success);
private object _lock = new object();
@@ -32,15 +32,17 @@ internal class Scraper
///
public string? FileExtension { get; set; } = null;
+ private int _successfulDownloads;
///
/// Versions that successfully downloaded
///
- public int SuccessfulDownloads { get; private set; }
+ public int SuccessfulDownloads => _successfulDownloads;
+ private int _failedDownloads;
///
/// Versions that failed to download
///
- public int FailedDownloads { get; private set; }
+ public int FailedDownloads => _failedDownloads;
///
/// Event that fires upon a successful or failed download.
@@ -180,6 +182,8 @@ public async Task GetAssetDeliveryInformation()
if (response.StatusCode == HttpStatusCode.Conflict)
return (false, "Insufficient permissions to download asset", "");
+ else if (response.StatusCode == HttpStatusCode.Forbidden)
+ return (false, "Asset version has been deleted", "");
if (!IsSuccessStatusCode(response.StatusCode, allowForbidden: true)) // 403 means that the latest version is deleted but can still download
return (false, $"Unhandled status code ({(int)response.StatusCode}) ({await response.Content.ReadAsStringAsync()})", "");
@@ -280,8 +284,8 @@ private async Task LogAssetFromCdnHttpMessageResponse(HttpResponseMessage respon
///
private void FireAssetSuccess()
{
- SuccessfulDownloads++;
- OnDownloadFinished?.Invoke();
+ Interlocked.Increment(ref _successfulDownloads);
+ OnDownloadFinished?.Invoke(true);
}
///
@@ -289,8 +293,8 @@ private void FireAssetSuccess()
///
private void FireAssetFailed()
{
- SuccessfulDownloads++;
- OnDownloadFinished?.Invoke();
+ Interlocked.Increment(ref _failedDownloads);
+ OnDownloadFinished?.Invoke(false);
}
///
@@ -330,8 +334,13 @@ public async Task StartWorker()
FireAssetFailed();
continue;
}
-
- if (!IsSuccessStatusCode(cdnResponse.StatusCode))
+ else if (cdnResponse.StatusCode == HttpStatusCode.TooManyRequests)
+ {
+ LogAsset(error: $"Failed to fetch {AssetId} v{version} ({cdnUrl}): Too many requests", version: version);
+ FireAssetFailed();
+ continue;
+ }
+ else if (!IsSuccessStatusCode(cdnResponse.StatusCode))
{
LogAsset(error: $"Failed to fetch {AssetId} v{version} ({cdnUrl}): Unknown status code ({(int)cdnResponse.StatusCode})", version: version);
FireAssetFailed();
@@ -349,16 +358,16 @@ public async Task StartWorker()
///
public void PrintDownloadStatistics()
{
- Console.WriteLine($"Successful Downloads: {SuccessfulDownloads}");
- Console.WriteLine($"Failed Downloads: {FailedDownloads}");
- Console.WriteLine($"Total Downloads: {SuccessfulDownloads + FailedDownloads}");
+ Console.WriteLine($"{AssetId} | Successful Downloads: {SuccessfulDownloads}");
+ Console.WriteLine($"{AssetId} | Failed Downloads: {FailedDownloads}");
+ Console.WriteLine($"{AssetId} | Total Downloads: {SuccessfulDownloads + FailedDownloads}");
}
///
/// Writes the index file
///
/// Index header
- public void WriteIndexFile(string header)
+ public void WriteIndexFile()
{
if (!Config.Default.OutputType.IsIndexEnabled())
return;
@@ -375,7 +384,7 @@ public void WriteIndexFile(string header)
// create index file contents
StringBuilder builder = new StringBuilder();
- builder.AppendLine(header);
+ builder.AppendLine($"{AssetId} asset versions on {DateTime.Now.ToString("R")} ({TotalVersions} versions)");
foreach (AssetOutput asset in _index)
builder.AppendLine(asset.ToString());
From 1d7025f056e41f5c59fba5d8eea2e5b00e4611e9 Mon Sep 17 00:00:00 2001
From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com>
Date: Sun, 17 Aug 2025 22:27:57 +0100
Subject: [PATCH 03/26] add back roblox user-agent
---
RobloxUltimateScraper/Http.cs | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/RobloxUltimateScraper/Http.cs b/RobloxUltimateScraper/Http.cs
index a81be3d..3b3ffc7 100644
--- a/RobloxUltimateScraper/Http.cs
+++ b/RobloxUltimateScraper/Http.cs
@@ -41,7 +41,7 @@ static Http()
{
Timeout = TimeSpan.FromSeconds(Config.Default.HttpTimeout)
};
- //_HttpClient.DefaultRequestHeaders.Add("User-Agent", "Roblox/WinINet");
+ Client.DefaultRequestHeaders.Add("User-Agent", "Roblox/WinInet");
}
}
}
From 17344aa1c9c254e80f4855bd5e1d63e8fa5265f3 Mon Sep 17 00:00:00 2001
From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com>
Date: Sun, 17 Aug 2025 22:30:06 +0100
Subject: [PATCH 04/26] update AssetType enum
---
RobloxUltimateScraper/Enums/AssetType.cs | 13 ++++++++-----
1 file changed, 8 insertions(+), 5 deletions(-)
diff --git a/RobloxUltimateScraper/Enums/AssetType.cs b/RobloxUltimateScraper/Enums/AssetType.cs
index d3006fb..be4bb62 100644
--- a/RobloxUltimateScraper/Enums/AssetType.cs
+++ b/RobloxUltimateScraper/Enums/AssetType.cs
@@ -1,8 +1,5 @@
namespace RobloxUltimateScraper.Enums
{
- ///
- /// Automatically generated, do not modify.
- ///
internal enum AssetType
{
Product = 0,
@@ -81,7 +78,10 @@ internal enum AssetType
CodeSnippet = 80,
AdsVideo = 81,
OtaUpdate = 82,
- Screenshot = 83
+ Screenshot = 83,
+ RuntimePropertySet = 84,
+ StorePreviewVideo = 85,
+ GamePreviewVideo = 86
}
internal static class AssetTypeEx
@@ -164,7 +164,10 @@ internal static class AssetTypeEx
[AssetType.CodeSnippet] = null,
[AssetType.AdsVideo] = null,
[AssetType.OtaUpdate] = null,
- [AssetType.Screenshot] = null
+ [AssetType.Screenshot] = null,
+ [AssetType.RuntimePropertySet] = null,
+ [AssetType.StorePreviewVideo] = null,
+ [AssetType.GamePreviewVideo] = null
};
public static string? GetExtension(this AssetType type)
From 1a7537e1685f7dd2af374ec3a10cefe2e5215d29 Mon Sep 17 00:00:00 2001
From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com>
Date: Sun, 17 Aug 2025 22:31:57 +0100
Subject: [PATCH 05/26] bump zstdsharp version
---
RobloxUltimateScraper/RobloxUltimateScraper.csproj | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/RobloxUltimateScraper/RobloxUltimateScraper.csproj b/RobloxUltimateScraper/RobloxUltimateScraper.csproj
index 7195b27..5d7b5a4 100644
--- a/RobloxUltimateScraper/RobloxUltimateScraper.csproj
+++ b/RobloxUltimateScraper/RobloxUltimateScraper.csproj
@@ -1,4 +1,4 @@
-
+
Exe
@@ -12,7 +12,7 @@
-
+
From 0bde15bef061268ccee7557cb92c75ad2920971b Mon Sep 17 00:00:00 2001
From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com>
Date: Mon, 6 Oct 2025 21:53:58 +0100
Subject: [PATCH 06/26] update AssetType enum and extension map
---
RobloxUltimateScraper/Enums/AssetType.cs | 26 +++++++++++++-----------
RobloxUltimateScraper/Scraper.cs | 2 +-
2 files changed, 15 insertions(+), 13 deletions(-)
diff --git a/RobloxUltimateScraper/Enums/AssetType.cs b/RobloxUltimateScraper/Enums/AssetType.cs
index be4bb62..7a0f1a6 100644
--- a/RobloxUltimateScraper/Enums/AssetType.cs
+++ b/RobloxUltimateScraper/Enums/AssetType.cs
@@ -2,7 +2,7 @@
{
internal enum AssetType
{
- Product = 0,
+ Unknown = 0,
Image = 1,
TShirt = 2,
Audio = 3,
@@ -81,14 +81,15 @@ internal enum AssetType
Screenshot = 83,
RuntimePropertySet = 84,
StorePreviewVideo = 85,
- GamePreviewVideo = 86
+ GamePreviewVideo = 86,
+ CreatorExperienceConfig = 87
}
internal static class AssetTypeEx
{
private static readonly Dictionary _extensionMap = new Dictionary()
{
- [AssetType.Product] = null,
+ [AssetType.Unknown] = null,
[AssetType.Image] = "png", // TODO: auto detect what type of image it is
[AssetType.TShirt] = "rbxm",
[AssetType.Audio] = "ogg",
@@ -119,7 +120,7 @@ internal static class AssetTypeEx
[AssetType.Package] = "txt",
[AssetType.YouTubeVideo] = null,
[AssetType.GamePass] = null,
- [AssetType.App] = "rbxm",
+ [AssetType.App] = "rbxl",
[AssetType.Code] = null,
[AssetType.Plugin] = "rbxm",
[AssetType.SolidModel] = "rbxm",
@@ -140,11 +141,11 @@ internal static class AssetTypeEx
[AssetType.SwimAnimation] = "rbxm",
[AssetType.WalkAnimation] = "rbxm",
[AssetType.PoseAnimation] = "rbxm",
- [AssetType.LocalizationTableManifest] = null,
- [AssetType.LocalizationTableTranslation] = null,
+ [AssetType.LocalizationTableManifest] = "json",
+ [AssetType.LocalizationTableTranslation] = "json",
[AssetType.EmoteAnimation] = "rbxm",
[AssetType.Video] = null,
- [AssetType.TexturePack] = null,
+ [AssetType.TexturePack] = "xml",
[AssetType.TShirtAccessory] = "rbxm",
[AssetType.ShirtAccessory] = "rbxm",
[AssetType.PantsAccessory] = "rbxm",
@@ -154,20 +155,21 @@ internal static class AssetTypeEx
[AssetType.LeftShoeAccessory] = "rbxm",
[AssetType.RightShoeAccessory] = "rbxm",
[AssetType.DressSkirtAccessory] = "rbxm",
- [AssetType.FontFamily] = null,
- [AssetType.FontFace] = null,
- [AssetType.MeshHiddenSurfaceRemoval] = null,
+ [AssetType.FontFamily] = "json",
+ [AssetType.FontFace] = "ttf",
+ [AssetType.MeshHiddenSurfaceRemoval] = "rbxm",
[AssetType.EyebrowAccessory] = "rbxm",
[AssetType.EyelashAccessory] = "rbxm",
[AssetType.MoodAnimation] = "rbxm",
[AssetType.DynamicHead] = "rbxm",
[AssetType.CodeSnippet] = null,
[AssetType.AdsVideo] = null,
- [AssetType.OtaUpdate] = null,
+ [AssetType.OtaUpdate] = "rbxm",
[AssetType.Screenshot] = null,
[AssetType.RuntimePropertySet] = null,
[AssetType.StorePreviewVideo] = null,
- [AssetType.GamePreviewVideo] = null
+ [AssetType.GamePreviewVideo] = null,
+ [AssetType.CreatorExperienceConfig] = null
};
public static string? GetExtension(this AssetType type)
diff --git a/RobloxUltimateScraper/Scraper.cs b/RobloxUltimateScraper/Scraper.cs
index dcc27f4..4c9c583 100644
--- a/RobloxUltimateScraper/Scraper.cs
+++ b/RobloxUltimateScraper/Scraper.cs
@@ -163,7 +163,7 @@ public async Task GetAssetDeliveryInformation()
if (!Enum.TryParse(versionsStr, out assetType))
{
Debug.Assert(false);
- assetType = AssetType.Product;
+ assetType = AssetType.Unknown;
}
}
From 2313f236b89e9d0dd195fe791c1e8089ce9f4b92 Mon Sep 17 00:00:00 2001
From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com>
Date: Mon, 6 Oct 2025 21:54:52 +0100
Subject: [PATCH 07/26] fix last-modified crash
---
RobloxUltimateScraper/Scraper.cs | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/RobloxUltimateScraper/Scraper.cs b/RobloxUltimateScraper/Scraper.cs
index 4c9c583..259d08f 100644
--- a/RobloxUltimateScraper/Scraper.cs
+++ b/RobloxUltimateScraper/Scraper.cs
@@ -251,7 +251,9 @@ private async Task LogAssetFromCdnHttpMessageResponse(HttpResponseMessage respon
string cdnUrl)
{
// get last modified
- string? lastModified = response.Content.Headers.GetValues("last-modified").FirstOrDefault();
+ string? lastModified = null;
+ if (response.Content.Headers.TryGetValues("last-modified", out IEnumerable? lastModifiedValues))
+ lastModified = lastModifiedValues.First();
double? fileSize = null;
From 3e20639ecc459b72bc3c92da022c065af51b32c2 Mon Sep 17 00:00:00 2001
From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com>
Date: Mon, 6 Oct 2025 21:55:22 +0100
Subject: [PATCH 08/26] use doubles when calculating the file size
---
RobloxUltimateScraper/Scraper.cs | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/RobloxUltimateScraper/Scraper.cs b/RobloxUltimateScraper/Scraper.cs
index 259d08f..c021030 100644
--- a/RobloxUltimateScraper/Scraper.cs
+++ b/RobloxUltimateScraper/Scraper.cs
@@ -259,7 +259,7 @@ private async Task LogAssetFromCdnHttpMessageResponse(HttpResponseMessage respon
using (Stream stream = await response.Content.ReadAsStreamAsync())
{
- fileSize = Math.Round(stream.Length / 1024f / 1024f, 6);
+ fileSize = Math.Round(stream.Length / 1024.0 / 1024.0, 6);
if (Config.Default.OutputType.IsFileSavingEnabled())
{
From 3a849c29f7cf993a9e9ce9a1856b3c01a8b9d757 Mon Sep 17 00:00:00 2001
From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com>
Date: Mon, 6 Oct 2025 21:57:45 +0100
Subject: [PATCH 09/26] remove documentation for removed arguments
---
RobloxUltimateScraper/Scraper.cs | 5 -----
1 file changed, 5 deletions(-)
diff --git a/RobloxUltimateScraper/Scraper.cs b/RobloxUltimateScraper/Scraper.cs
index c021030..27fe1e8 100644
--- a/RobloxUltimateScraper/Scraper.cs
+++ b/RobloxUltimateScraper/Scraper.cs
@@ -86,7 +86,6 @@ public async Task Setup()
///
/// Creates a request to https://assetdelivery.roblox.com/v1/asset/
///
- /// Asset Id
/// Asset Version (0 for latest)
/// Http response
public Task AssetRequest(int version = 0)
@@ -128,7 +127,6 @@ public struct AssetDeliveryInformation
///
/// Retrieves information from asset delivery
///
- /// Asset Id
/// Asset delivery information
public async Task GetAssetDeliveryInformation()
{
@@ -173,7 +171,6 @@ public async Task GetAssetDeliveryInformation()
///
/// Retrieves the CDN url from an asset id
///
- /// Asset Id
/// Version (0 for latest)
/// Success, Error string, CDN url
public async Task<(bool, string, string)> GetCdnUrl(int version = 0)
@@ -199,7 +196,6 @@ public async Task GetAssetDeliveryInformation()
///
/// Constructs the asset output path
///
- /// Id
/// Version
/// Asset output path
public string BuildAssetOutputFileName(int version)
@@ -368,7 +364,6 @@ public void PrintDownloadStatistics()
///
/// Writes the index file
///
- /// Index header
public void WriteIndexFile()
{
if (!Config.Default.OutputType.IsIndexEnabled())
From 10362ca3f2c6fb4ad74494dfc0cb09dcfa8574ac Mon Sep 17 00:00:00 2001
From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com>
Date: Tue, 7 Oct 2025 14:45:47 +0100
Subject: [PATCH 10/26] update roblox auth loading
---
RobloxUltimateScraper/CommandLineConfig.cs | 12 +++
RobloxUltimateScraper/Config.cs | 6 ++
.../Enums/RobloxAuthStatus.cs | 25 +++++
RobloxUltimateScraper/Http.cs | 92 ++++++++++++++-----
RobloxUltimateScraper/Program.cs | 82 ++++++++++++++++-
5 files changed, 191 insertions(+), 26 deletions(-)
create mode 100644 RobloxUltimateScraper/Enums/RobloxAuthStatus.cs
diff --git a/RobloxUltimateScraper/CommandLineConfig.cs b/RobloxUltimateScraper/CommandLineConfig.cs
index e53ba6a..cd34edd 100644
--- a/RobloxUltimateScraper/CommandLineConfig.cs
+++ b/RobloxUltimateScraper/CommandLineConfig.cs
@@ -194,5 +194,17 @@ public string BaseUrl
[Option("trimcdnurlinconsole", Required = false, Default = null, HelpText = "Should the CDN url in console be trimmed.")]
public bool? TrimCdnUrlInConsole { get; set; }
+
+ ///
+ /// Disables checks responsible for checking if the current run is authenticated.
+ ///
+ [Option("disablerobloxauthchecks", Required = false, Default = false, HelpText = "Disables checks responsible for checking if the current run is authenticated.")]
+ public bool DisableRobloxAuthChecks { get; set; } = false;
+
+ ///
+ /// Should fail if the current run is unauthenticated?
+ ///
+ [Option("failifunauthenticated", Required = false, Default = false, HelpText = "Should fail if the current run is unauthenticated?")]
+ public bool FailIfUnauthenticated { get; set; } = false;
}
}
diff --git a/RobloxUltimateScraper/Config.cs b/RobloxUltimateScraper/Config.cs
index 6ed8bf9..90568ab 100644
--- a/RobloxUltimateScraper/Config.cs
+++ b/RobloxUltimateScraper/Config.cs
@@ -35,6 +35,12 @@ internal class Config
public bool TrimCdnUrlInConsole { get; }
+ ///
+ public bool DisableRobloxAuthChecks => _clConfig.DisableRobloxAuthChecks;
+
+ ///
+ public bool FailIfUnauthenticated => _clConfig.FailIfUnauthenticated;
+
public Config(CommandLineConfig config)
{
_clConfig = config;
diff --git a/RobloxUltimateScraper/Enums/RobloxAuthStatus.cs b/RobloxUltimateScraper/Enums/RobloxAuthStatus.cs
new file mode 100644
index 0000000..363505b
--- /dev/null
+++ b/RobloxUltimateScraper/Enums/RobloxAuthStatus.cs
@@ -0,0 +1,25 @@
+namespace RobloxUltimateScraper.Enums
+{
+ internal enum RobloxAuthStatus
+ {
+ ///
+ /// The auth cookie being used is valid
+ ///
+ Authenticated,
+
+ ///
+ /// No authentication cookie is set
+ ///
+ Unauthenticated,
+
+ ///
+ /// The auth cookie being used is invalid
+ ///
+ InvalidAuth,
+
+ ///
+ /// An error occured while validating
+ ///
+ Error
+ }
+}
diff --git a/RobloxUltimateScraper/Http.cs b/RobloxUltimateScraper/Http.cs
index 3b3ffc7..d5e709c 100644
--- a/RobloxUltimateScraper/Http.cs
+++ b/RobloxUltimateScraper/Http.cs
@@ -1,47 +1,89 @@
-using System.Net;
+using System.Diagnostics;
+using System.Net;
namespace RobloxUltimateScraper
{
internal static class Http
{
- public static HttpClient Client { get; }
+ private static CookieContainer _cookieContainer = null!;
- static Http()
- {
- CookieContainer cookieContainer = new CookieContainer();
- string? cookie = null;
+ ///
+ /// A singleton that can be shared across all threads
+ ///
+ public static HttpClient Client { get; private set; } = null!;
- if (!string.IsNullOrEmpty(Config.Default.AuthCookie))
- {
- Console.WriteLine("Using cookies from arguments.");
- cookie = Config.Default.AuthCookie;
- }
- else
- {
- string? envValue = Environment.GetEnvironmentVariable("ROBLOXULTIMATESCRAPER_COOKIE");
- if (!string.IsNullOrEmpty(envValue))
- {
- Console.WriteLine("Using cookies from environment variables.");
- cookie = envValue;
- }
- }
+ ///
+ /// Boolean that indicates if a .ROBLOSECURITY cookie has been set for this session
+ ///
+ public static bool HasRobloxAuth { get; private set; } = false;
- if (cookie != null)
- cookieContainer.Add(new Cookie(".ROBLOSECURITY", cookie, "/", $".{Config.Default.BaseUrl}"));
+ ///
+ /// Creates a new with the appropriate settings for this scraper
+ ///
+ /// New HttpClient instance
+ public static HttpClient CreateClient()
+ {
+ Debug.Assert(_cookieContainer != null);
HttpClientHandler httpClientHandler = new HttpClientHandler
{
AutomaticDecompression = DecompressionMethods.All,
AllowAutoRedirect = false, // we are using v1 because v2 is bad
- CookieContainer = cookieContainer,
+ CookieContainer = _cookieContainer,
UseCookies = true
};
- Client = new HttpClient(httpClientHandler)
+ HttpClient client = new HttpClient(httpClientHandler)
{
Timeout = TimeSpan.FromSeconds(Config.Default.HttpTimeout)
};
- Client.DefaultRequestHeaders.Add("User-Agent", "Roblox/WinInet");
+ client.DefaultRequestHeaders.Add("User-Agent", "Roblox/WinInet");
+
+ return client;
+ }
+
+ private static string? GetRobloxAuthCookie()
+ {
+ if (!string.IsNullOrEmpty(Config.Default.AuthCookie))
+ {
+ Console.WriteLine("Using auth cookie from arguments.");
+ return Config.Default.AuthCookie;
+ }
+
+ string? envValue;
+ envValue = Environment.GetEnvironmentVariable("ROBLOXULTIMATESCRAPER_COOKIE");
+ if (!string.IsNullOrEmpty(envValue))
+ {
+ Console.WriteLine("Using auth cookie from environment variables (ROBLOXULTIMATESCRAPER_COOKIE).");
+ return envValue;
+ }
+
+ envValue = Environment.GetEnvironmentVariable("ROBLOXULTIMATESCRAPER_COOKIE_PATH");
+ if (!string.IsNullOrEmpty(envValue))
+ {
+ Console.WriteLine("Using auth cookie from environment variables (ROBLOXULTIMATESCRAPER_COOKIE_PATH).");
+
+ if (!File.Exists(envValue))
+ throw new ApplicationException($"Can not read the auth cookie: File {envValue} does not exist.");
+
+ return File.ReadAllText(envValue);
+ }
+
+ return null;
+ }
+
+ public static void Init()
+ {
+ _cookieContainer = new CookieContainer();
+
+ string? cookie = GetRobloxAuthCookie();
+ if (cookie != null)
+ {
+ _cookieContainer.Add(new Cookie(".ROBLOSECURITY", cookie, "/", $".{Config.Default.BaseUrl}"));
+ HasRobloxAuth = true;
+ }
+
+ Client = CreateClient();
}
}
}
diff --git a/RobloxUltimateScraper/Program.cs b/RobloxUltimateScraper/Program.cs
index f5c505b..b376ed7 100644
--- a/RobloxUltimateScraper/Program.cs
+++ b/RobloxUltimateScraper/Program.cs
@@ -1,6 +1,7 @@
using CommandLine;
using CommandLine.Text;
using RobloxUltimateScraper.Enums;
+using System.Net;
using System.Reflection;
namespace RobloxUltimateScraper
@@ -29,13 +30,27 @@ static void Run(CommandLineConfig config)
Config.Initialise(config);
+ try
+ {
+ Http.Init();
+ }
+ catch (Exception ex)
+ {
+ Console.WriteLine($"[ERROR]: Failed to initialise HTTP ({ex})");
+ Environment.Exit(1);
+ return;
+ }
+
+ if (!Config.Default.DisableRobloxAuthChecks)
+ CheckRobloxAuthStatus();
+
// TODO: add functionality for
// list
// list versions
switch (Config.Default.Scraper)
{
case ScraperType.None:
- Console.WriteLine("No scraper chosen.");
+ Console.WriteLine("No scraper selected.");
Console.WriteLine("Run the scraper with the --help argument for all commands.");
break;
@@ -71,6 +86,71 @@ static void Error(ParserResult config, IEnumerable err
Console.WriteLine(text);
}
+ static void CheckRobloxAuthStatus()
+ {
+ RobloxAuthStatus status = GetRobloxAuthStatus(out int httpCode, out Exception? exception);
+
+ switch (status)
+ {
+ case RobloxAuthStatus.Unauthenticated:
+ Console.WriteLine("[WARNING]: No authentication is set for this run. You may face problems downloading assets not authored by Roblox.");
+ break;
+
+ case RobloxAuthStatus.InvalidAuth:
+ Console.WriteLine("[ERROR]: Provided authentication cookie is invalid.");
+ Environment.Exit(1);
+ return;
+
+ case RobloxAuthStatus.Error:
+ if (exception != null)
+ {
+ Console.WriteLine($"[WARNING]: Failed to check if authentication is valid ({exception.Message}).");
+ Console.WriteLine(exception);
+ }
+ else
+ {
+ Console.WriteLine($"[WARNING]: Failed to check if authentication is valid (got unexpected HTTP code {httpCode}).");
+ }
+ break;
+
+ }
+
+ if (status != RobloxAuthStatus.Authenticated && Config.Default.FailIfUnauthenticated)
+ {
+ Console.WriteLine("[ERROR]: Fail if unauthenticated flag is enabled. Stopping execution.");
+ Environment.Exit(1);
+ return;
+ }
+ }
+
+ static RobloxAuthStatus GetRobloxAuthStatus(out int httpCode, out Exception? exception)
+ {
+ httpCode = 0;
+ exception = null;
+
+ if (!Http.HasRobloxAuth)
+ return RobloxAuthStatus.Unauthenticated;
+
+ try
+ {
+ HttpResponseMessage message = Http.Client.GetAsync("https://users.roblox.com/v1/users/authenticated").Result;
+
+ httpCode = (int)message.StatusCode;
+
+ return message.StatusCode switch
+ {
+ HttpStatusCode.OK => RobloxAuthStatus.Authenticated,
+ HttpStatusCode.Unauthorized => RobloxAuthStatus.InvalidAuth,
+ _ => RobloxAuthStatus.Error
+ };
+ }
+ catch (Exception ex)
+ {
+ exception = ex;
+ return RobloxAuthStatus.Error;
+ }
+ }
+
// TODO: move asset scraper to a separate file
///
From 4670cb8439c219114e321efd5c02685a42881eab Mon Sep 17 00:00:00 2001
From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com>
Date: Tue, 7 Oct 2025 15:29:26 +0100
Subject: [PATCH 11/26] improve error handling in scraper
---
RobloxUltimateScraper/Scraper.cs | 91 ++++++++++++++++++++++----------
1 file changed, 64 insertions(+), 27 deletions(-)
diff --git a/RobloxUltimateScraper/Scraper.cs b/RobloxUltimateScraper/Scraper.cs
index 27fe1e8..93071c5 100644
--- a/RobloxUltimateScraper/Scraper.cs
+++ b/RobloxUltimateScraper/Scraper.cs
@@ -175,22 +175,69 @@ public async Task GetAssetDeliveryInformation()
/// Success, Error string, CDN url
public async Task<(bool, string, string)> GetCdnUrl(int version = 0)
{
- HttpResponseMessage response = await AssetRequest(version);
+ try
+ {
+ HttpResponseMessage response = await AssetRequest(version);
- if (response.StatusCode == HttpStatusCode.Conflict)
- return (false, "Insufficient permissions to download asset", "");
- else if (response.StatusCode == HttpStatusCode.Forbidden)
- return (false, "Asset version has been deleted", "");
+ switch (response.StatusCode)
+ {
+ case HttpStatusCode.Conflict:
+ return (false, "Insufficient permissions to download asset", "");
- if (!IsSuccessStatusCode(response.StatusCode, allowForbidden: true)) // 403 means that the latest version is deleted but can still download
- return (false, $"Unhandled status code ({(int)response.StatusCode}) ({await response.Content.ReadAsStringAsync()})", "");
+ case HttpStatusCode.Forbidden:
+ return (false, "Asset version has been deleted", "");
+
+ case HttpStatusCode.TooManyRequests:
+ return (false, "Too many requests", "");
+ }
+
+ if (!IsSuccessStatusCode(response.StatusCode, allowForbidden: true)) // 403 means that the latest version is deleted but can still download
+ return (false, $"Unhandled status code ({(int)response.StatusCode}) ({await response.Content.ReadAsStringAsync()})", "");
+
+ if (!response.Headers.TryGetValues("Location", out IEnumerable? values))
+ return (false, "Location header is missing", ""); // this should never happen, but handle anyways
+
+ string location = values.First();
+
+ return (true, "Success", location);
+ }
+ catch (Exception ex)
+ {
+ return (false, ex.ToString(), "");
+ }
+ }
+
+ ///
+ /// Gets content from the CDN using a specified URL
+ ///
+ /// CDN Url
+ /// Success, Error Message, HttpResponseMessage
+ public async Task<(bool, string, HttpResponseMessage?)> GetCdnContent(string url)
+ {
+ try
+ {
+ HttpResponseMessage response = await Http.Client.GetAsync(url);
- if (!response.Headers.TryGetValues("Location", out IEnumerable? values))
- return (false, "Location header is missing", ""); // this should never happen, but handle anyways
+ switch (response.StatusCode)
+ {
+ case HttpStatusCode.Forbidden:
+ return (false, "Asset not found on CDN", null);
- string location = values.First();
+ case HttpStatusCode.TooManyRequests:
+ return (false, "Too many requests", null);
+
+ default:
+ if (!IsSuccessStatusCode(response.StatusCode))
+ return (false, $"Unknown status code ({(int)response.StatusCode})", null);
+ break;
+ }
- return (true, "Success", location);
+ return (true, "Success", response);
+ }
+ catch (Exception ex)
+ {
+ return (false, ex.ToString(), null);
+ }
}
///
@@ -324,29 +371,19 @@ public async Task StartWorker()
}
// download the asset
- HttpResponseMessage cdnResponse = await Http.Client.GetAsync(cdnUrl);
+ (bool cdnDownloadSuccess, string cdnDownloadMessage, HttpResponseMessage? cdnDownloadResponse) = await GetCdnContent(cdnUrl);
- if (cdnResponse.StatusCode == HttpStatusCode.Forbidden)
+ if (!cdnDownloadSuccess)
{
- LogAsset(error: $"Failed to fetch {AssetId} v{version} ({cdnUrl}): Asset not found on CDN", version: version);
- FireAssetFailed();
- continue;
- }
- else if (cdnResponse.StatusCode == HttpStatusCode.TooManyRequests)
- {
- LogAsset(error: $"Failed to fetch {AssetId} v{version} ({cdnUrl}): Too many requests", version: version);
- FireAssetFailed();
- continue;
- }
- else if (!IsSuccessStatusCode(cdnResponse.StatusCode))
- {
- LogAsset(error: $"Failed to fetch {AssetId} v{version} ({cdnUrl}): Unknown status code ({(int)cdnResponse.StatusCode})", version: version);
+ LogAsset(error: $"Failed to fetch {AssetId} v{version} ({cdnUrl}): {cdnDownloadMessage}", version: version);
FireAssetFailed();
continue;
}
+ Debug.Assert(cdnDownloadResponse != null);
+
// save!
- await LogAssetFromCdnHttpMessageResponse(cdnResponse, version, cdnUrl);
+ await LogAssetFromCdnHttpMessageResponse(cdnDownloadResponse, version, cdnUrl);
FireAssetSuccess();
}
}
From 678cc97d831258d1be59f653dcd033afdac35649 Mon Sep 17 00:00:00 2001
From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com>
Date: Tue, 7 Oct 2025 15:40:50 +0100
Subject: [PATCH 12/26] ability to use multiple httpclients
---
RobloxUltimateScraper/CommandLineConfig.cs | 7 +++++
RobloxUltimateScraper/Config.cs | 4 +++
.../Properties/launchSettings.json | 4 +--
RobloxUltimateScraper/Scraper.cs | 31 ++++++++++++-------
4 files changed, 33 insertions(+), 13 deletions(-)
diff --git a/RobloxUltimateScraper/CommandLineConfig.cs b/RobloxUltimateScraper/CommandLineConfig.cs
index cd34edd..3aaa156 100644
--- a/RobloxUltimateScraper/CommandLineConfig.cs
+++ b/RobloxUltimateScraper/CommandLineConfig.cs
@@ -206,5 +206,12 @@ public string BaseUrl
///
[Option("failifunauthenticated", Required = false, Default = false, HelpText = "Should fail if the current run is unauthenticated?")]
public bool FailIfUnauthenticated { get; set; } = false;
+
+ [Option("singlehttpclient", Required = false, Default = false, HelpText = "Should only use a single and shared HTTP client for all scraper threads. This was the behaviour present in 0.1.3.0 and before. Other name: --shc.")]
+ public bool SingleHttpClient { get; set; } = false;
+
+ // hack... again!!!
+ [Option("shc", Required = false, Hidden = true)]
+ public bool? SingleHttpClientOtherName { get; set; }
}
}
diff --git a/RobloxUltimateScraper/Config.cs b/RobloxUltimateScraper/Config.cs
index 90568ab..45a9ece 100644
--- a/RobloxUltimateScraper/Config.cs
+++ b/RobloxUltimateScraper/Config.cs
@@ -41,12 +41,16 @@ internal class Config
///
public bool FailIfUnauthenticated => _clConfig.FailIfUnauthenticated;
+ ///
+ public bool SingleHttpClient { get; }
+
public Config(CommandLineConfig config)
{
_clConfig = config;
CompressionLevel = _clConfig.CompressionLevelArgOtherName != null ? (int)_clConfig.CompressionLevelArgOtherName : _clConfig.CompressionLevelArg;
TrimCdnUrlInConsole = _clConfig.TrimCdnUrlInConsole ?? OutputType != OutputType.Console;
+ SingleHttpClient = _clConfig.SingleHttpClientOtherName ?? _clConfig.SingleHttpClient;
}
public static void Initialise(CommandLineConfig commandLineConfig)
diff --git a/RobloxUltimateScraper/Properties/launchSettings.json b/RobloxUltimateScraper/Properties/launchSettings.json
index fc68c1c..c1cf511 100644
--- a/RobloxUltimateScraper/Properties/launchSettings.json
+++ b/RobloxUltimateScraper/Properties/launchSettings.json
@@ -5,11 +5,11 @@
},
"RobloxUltimateScraper - Asset Scraper": {
"commandName": "Project",
- "commandLineArgs": "-w 30\r\n-c Zstd\r\n--cl 15\r\n-d Crossroads\r\n-o Both\r\n-a 1818\r\n-i All"
+ "commandLineArgs": "-w 5\r\n-c Zstd\r\n--cl 15\r\n-d Crossroads\r\n-o Both\r\n-a 1818\r\n-i All"
},
"RobloxUltimateScraper - Range Scraper": {
"commandName": "Project",
- "commandLineArgs": "-w 10\r\n-c Zstd\r\n--cl 9\r\n-o Both\r\n-r 1000000-1000100\r\n-i All"
+ "commandLineArgs": "-w 5\r\n-c Zstd\r\n--cl 9\r\n-o Both\r\n-r 1000000-1000100\r\n-i All"
}
}
}
\ No newline at end of file
diff --git a/RobloxUltimateScraper/Scraper.cs b/RobloxUltimateScraper/Scraper.cs
index 93071c5..0a964bd 100644
--- a/RobloxUltimateScraper/Scraper.cs
+++ b/RobloxUltimateScraper/Scraper.cs
@@ -71,7 +71,7 @@ public struct SetupResult
public async Task Setup()
{
- var assetDeliveryInfo = await GetAssetDeliveryInformation();
+ var assetDeliveryInfo = await GetAssetDeliveryInformation(Http.Client);
if (!assetDeliveryInfo.Success)
return new SetupResult { Success = false, Message = $"Failed to fetch versions for asset {AssetId}: {assetDeliveryInfo.Error}" };
@@ -86,12 +86,13 @@ public async Task Setup()
///
/// Creates a request to https://assetdelivery.roblox.com/v1/asset/
///
+ /// Http client
/// Asset Version (0 for latest)
/// Http response
- public Task AssetRequest(int version = 0)
+ public Task AssetRequest(HttpClient httpClient, int version = 0)
{
string url = $"https://assetdelivery.{Config.Default.BaseUrl}/v1/asset/?id={AssetId}&version={version}";
- return Http.Client.GetAsync(url);
+ return httpClient.GetAsync(url);
}
///
@@ -128,9 +129,9 @@ public struct AssetDeliveryInformation
/// Retrieves information from asset delivery
///
/// Asset delivery information
- public async Task GetAssetDeliveryInformation()
+ public async Task GetAssetDeliveryInformation(HttpClient httpClient)
{
- HttpResponseMessage response = await AssetRequest();
+ HttpResponseMessage response = await AssetRequest(httpClient);
if (response.StatusCode == HttpStatusCode.Conflict)
return new AssetDeliveryInformation { Success = false, Error = "Insufficient permissions to download asset" };
@@ -171,13 +172,14 @@ public async Task GetAssetDeliveryInformation()
///
/// Retrieves the CDN url from an asset id
///
+ /// Http client
/// Version (0 for latest)
/// Success, Error string, CDN url
- public async Task<(bool, string, string)> GetCdnUrl(int version = 0)
+ public async Task<(bool, string, string)> GetCdnUrl(HttpClient httpClient, int version = 0)
{
try
{
- HttpResponseMessage response = await AssetRequest(version);
+ HttpResponseMessage response = await AssetRequest(httpClient, version);
switch (response.StatusCode)
{
@@ -210,13 +212,14 @@ public async Task GetAssetDeliveryInformation()
///
/// Gets content from the CDN using a specified URL
///
+ /// Http client
/// CDN Url
/// Success, Error Message, HttpResponseMessage
- public async Task<(bool, string, HttpResponseMessage?)> GetCdnContent(string url)
+ public static async Task<(bool, string, HttpResponseMessage?)> GetCdnContent(HttpClient httpClient, string url)
{
try
{
- HttpResponseMessage response = await Http.Client.GetAsync(url);
+ HttpResponseMessage response = await httpClient.GetAsync(url);
switch (response.StatusCode)
{
@@ -349,6 +352,8 @@ private void FireAssetFailed()
// TODO: add try catch blocks. give 3 retries w/ exceptions
public async Task StartWorker()
{
+ HttpClient httpClient = Config.Default.SingleHttpClient ? Http.Client : Http.CreateClient();
+
while (TotalVersions > CurrentVersion)
{
int version;
@@ -361,7 +366,7 @@ public async Task StartWorker()
}
// get the url
- (bool cdnGetSuccess, string cdnGetMessage, string cdnUrl) = await GetCdnUrl(version);
+ (bool cdnGetSuccess, string cdnGetMessage, string cdnUrl) = await GetCdnUrl(httpClient, version);
if (!cdnGetSuccess)
{
@@ -371,7 +376,7 @@ public async Task StartWorker()
}
// download the asset
- (bool cdnDownloadSuccess, string cdnDownloadMessage, HttpResponseMessage? cdnDownloadResponse) = await GetCdnContent(cdnUrl);
+ (bool cdnDownloadSuccess, string cdnDownloadMessage, HttpResponseMessage? cdnDownloadResponse) = await GetCdnContent(httpClient, cdnUrl);
if (!cdnDownloadSuccess)
{
@@ -386,6 +391,10 @@ public async Task StartWorker()
await LogAssetFromCdnHttpMessageResponse(cdnDownloadResponse, version, cdnUrl);
FireAssetSuccess();
}
+
+ // only dispose if it isnt the global http client
+ if (httpClient != Http.Client)
+ httpClient.Dispose();
}
///
From 2b2f4307a72c07211a93c2fc60bd9b5687e4da09 Mon Sep 17 00:00:00 2001
From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com>
Date: Tue, 7 Oct 2025 15:40:55 +0100
Subject: [PATCH 13/26] bump version
---
RobloxUltimateScraper/RobloxUltimateScraper.csproj | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/RobloxUltimateScraper/RobloxUltimateScraper.csproj b/RobloxUltimateScraper/RobloxUltimateScraper.csproj
index 5d7b5a4..7ca064b 100644
--- a/RobloxUltimateScraper/RobloxUltimateScraper.csproj
+++ b/RobloxUltimateScraper/RobloxUltimateScraper.csproj
@@ -5,7 +5,7 @@
net6.0
enable
enable
- 0.1.3.0
+ 0.1.4.0
false
From 729aa887c226a1d2872a98b4d8ef6cd7ad236c24 Mon Sep 17 00:00:00 2001
From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com>
Date: Tue, 7 Oct 2025 15:59:52 +0100
Subject: [PATCH 14/26] use .net 8.0
---
RobloxUltimateScraper/RobloxUltimateScraper.csproj | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/RobloxUltimateScraper/RobloxUltimateScraper.csproj b/RobloxUltimateScraper/RobloxUltimateScraper.csproj
index 7ca064b..77fcbef 100644
--- a/RobloxUltimateScraper/RobloxUltimateScraper.csproj
+++ b/RobloxUltimateScraper/RobloxUltimateScraper.csproj
@@ -2,7 +2,7 @@
Exe
- net6.0
+ net8.0
enable
enable
0.1.4.0
From 97c654402a8585e35b08c6b67d4fff556acacbe6 Mon Sep 17 00:00:00 2001
From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com>
Date: Tue, 7 Oct 2025 16:00:18 +0100
Subject: [PATCH 15/26] expanded trimcdnurl
---
RobloxUltimateScraper/CommandLineConfig.cs | 7 ++-
RobloxUltimateScraper/Config.cs | 4 +-
RobloxUltimateScraper/Enums/OutputType.cs | 2 +-
RobloxUltimateScraper/Enums/TrimCdnUrlType.cs | 51 +++++++++++++++++++
RobloxUltimateScraper/Scraper.cs | 4 +-
5 files changed, 61 insertions(+), 7 deletions(-)
create mode 100644 RobloxUltimateScraper/Enums/TrimCdnUrlType.cs
diff --git a/RobloxUltimateScraper/CommandLineConfig.cs b/RobloxUltimateScraper/CommandLineConfig.cs
index 3aaa156..fd05c43 100644
--- a/RobloxUltimateScraper/CommandLineConfig.cs
+++ b/RobloxUltimateScraper/CommandLineConfig.cs
@@ -192,8 +192,11 @@ public string BaseUrl
}
}
- [Option("trimcdnurlinconsole", Required = false, Default = null, HelpText = "Should the CDN url in console be trimmed.")]
- public bool? TrimCdnUrlInConsole { get; set; }
+ ///
+ /// Decides where CDN url parameters should be trimmed.
+ ///
+ [Option("trim", Required = false, Default = TrimCdnUrlType.All, HelpText = "Decides where CDN url parameters should be trimmed. (Off, Console, Output, All)")]
+ public TrimCdnUrlType TrimCdnUrl { get; set; } = TrimCdnUrlType.All;
///
/// Disables checks responsible for checking if the current run is authenticated.
diff --git a/RobloxUltimateScraper/Config.cs b/RobloxUltimateScraper/Config.cs
index 45a9ece..7ea58e5 100644
--- a/RobloxUltimateScraper/Config.cs
+++ b/RobloxUltimateScraper/Config.cs
@@ -33,7 +33,8 @@ internal class Config
public string BaseUrl => _clConfig.BaseUrl;
- public bool TrimCdnUrlInConsole { get; }
+ ///
+ public TrimCdnUrlType TrimCdnUrl => _clConfig.TrimCdnUrl;
///
public bool DisableRobloxAuthChecks => _clConfig.DisableRobloxAuthChecks;
@@ -49,7 +50,6 @@ public Config(CommandLineConfig config)
_clConfig = config;
CompressionLevel = _clConfig.CompressionLevelArgOtherName != null ? (int)_clConfig.CompressionLevelArgOtherName : _clConfig.CompressionLevelArg;
- TrimCdnUrlInConsole = _clConfig.TrimCdnUrlInConsole ?? OutputType != OutputType.Console;
SingleHttpClient = _clConfig.SingleHttpClientOtherName ?? _clConfig.SingleHttpClient;
}
diff --git a/RobloxUltimateScraper/Enums/OutputType.cs b/RobloxUltimateScraper/Enums/OutputType.cs
index b3167f1..3cad91a 100644
--- a/RobloxUltimateScraper/Enums/OutputType.cs
+++ b/RobloxUltimateScraper/Enums/OutputType.cs
@@ -22,7 +22,7 @@ internal enum OutputType
IndexOnly = 1,
///
- /// Console output
+ /// Console output only
///
Console = 2,
diff --git a/RobloxUltimateScraper/Enums/TrimCdnUrlType.cs b/RobloxUltimateScraper/Enums/TrimCdnUrlType.cs
new file mode 100644
index 0000000..2930a89
--- /dev/null
+++ b/RobloxUltimateScraper/Enums/TrimCdnUrlType.cs
@@ -0,0 +1,51 @@
+using System.Diagnostics;
+
+namespace RobloxUltimateScraper.Enums
+{
+ internal enum TrimCdnUrlType
+ {
+ ///
+ /// Turn off CDN url trimming entirely
+ ///
+ Off,
+
+ ///
+ /// Should trim the CDN url for Console only
+ ///
+ Console,
+
+ ///
+ /// Should trim the CDN url for Output only
+ ///
+ Output,
+
+ ///
+ /// Should trim the CDN url for both Console and Output
+ ///
+ All
+ }
+
+ internal static class TrimCdnUrlTypeEx
+ {
+ public static bool ShouldTrim(this TrimCdnUrlType type, OutputType outputType)
+ {
+ if (type == TrimCdnUrlType.Off)
+ return false;
+ if (type == TrimCdnUrlType.All)
+ return true;
+
+ switch (outputType)
+ {
+ case OutputType.Console:
+ return type == TrimCdnUrlType.Console;
+
+ case OutputType.Index:
+ return type == TrimCdnUrlType.Output;
+
+ default:
+ Debug.Assert(false);
+ return false;
+ }
+ }
+ }
+}
diff --git a/RobloxUltimateScraper/Scraper.cs b/RobloxUltimateScraper/Scraper.cs
index 0a964bd..81e7831 100644
--- a/RobloxUltimateScraper/Scraper.cs
+++ b/RobloxUltimateScraper/Scraper.cs
@@ -281,7 +281,7 @@ private void LogAsset(
Error = error
};
- Console.WriteLine(output.ToString(trimCdnUrl: Config.Default.TrimCdnUrlInConsole));
+ Console.WriteLine(output.ToString(trimCdnUrl: Config.Default.TrimCdnUrl.ShouldTrim(OutputType.Console)));
_index.Add(output);
}
@@ -430,7 +430,7 @@ public void WriteIndexFile()
builder.AppendLine($"{AssetId} asset versions on {DateTime.Now.ToString("R")} ({TotalVersions} versions)");
foreach (AssetOutput asset in _index)
- builder.AppendLine(asset.ToString());
+ builder.AppendLine(asset.ToString(trimCdnUrl: Config.Default.TrimCdnUrl.ShouldTrim(OutputType.Index)));
string contents = builder.ToString();
From 90da67d245331abb0a87809521bd8b3ac5b36395 Mon Sep 17 00:00:00 2001
From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com>
Date: Tue, 7 Oct 2025 16:00:24 +0100
Subject: [PATCH 16/26] Revert "bump version"
This reverts commit 2b2f4307a72c07211a93c2fc60bd9b5687e4da09.
---
RobloxUltimateScraper/RobloxUltimateScraper.csproj | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/RobloxUltimateScraper/RobloxUltimateScraper.csproj b/RobloxUltimateScraper/RobloxUltimateScraper.csproj
index 77fcbef..24f9b5c 100644
--- a/RobloxUltimateScraper/RobloxUltimateScraper.csproj
+++ b/RobloxUltimateScraper/RobloxUltimateScraper.csproj
@@ -5,7 +5,7 @@
net8.0
enable
enable
- 0.1.4.0
+ 0.1.3.0
false
From ae29b69482342ebbfe46dde351ca0c0dce48401a Mon Sep 17 00:00:00 2001
From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com>
Date: Tue, 7 Oct 2025 16:01:44 +0100
Subject: [PATCH 17/26] Update ci.yml
---
.github/workflows/ci.yml | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 68ca551..b03e770 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -20,7 +20,7 @@ jobs:
submodules: true
- uses: actions/setup-dotnet@v4
with:
- dotnet-version: '6.x'
+ dotnet-version: '8.x'
- name: Restore dependencies
run: dotnet restore
- name: Build
@@ -32,4 +32,4 @@ jobs:
with:
name: RobloxUltimateScraper (${{ matrix.configuration }}, ${{ matrix.platform }})
path: |
- ./RobloxUltimateScraper/bin/${{ matrix.configuration }}/net6.0/${{ matrix.platform }}/publish/*
\ No newline at end of file
+ ./RobloxUltimateScraper/bin/${{ matrix.configuration }}/net8.0/${{ matrix.platform }}/publish/*
\ No newline at end of file
From 92e0a6f2156af55515ab996465c2540d4edc8f2d Mon Sep 17 00:00:00 2001
From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com>
Date: Tue, 7 Oct 2025 16:07:12 +0100
Subject: [PATCH 18/26] remove "WIP!" from range argument documentation
---
RobloxUltimateScraper/CommandLineConfig.cs | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/RobloxUltimateScraper/CommandLineConfig.cs b/RobloxUltimateScraper/CommandLineConfig.cs
index fd05c43..67b334b 100644
--- a/RobloxUltimateScraper/CommandLineConfig.cs
+++ b/RobloxUltimateScraper/CommandLineConfig.cs
@@ -83,7 +83,7 @@ public string UseListVersionsScraper
/// Use the asset range scraper.
/// COMMAND LINE USE ONLY!
///
- [Option('r', "range", Required = false, HelpText = "Use the asset range scraper. Parameter takes in [Start ID]-[End ID]. WIP!")]
+ [Option('r', "range", Required = false, HelpText = "Use the asset range scraper. Parameter takes in [Start ID]-[End ID].")]
public string UseRangeScraper
{
set
From 5dbbe4373eb8b5af92e6bfd60208cf658d262588 Mon Sep 17 00:00:00 2001
From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com>
Date: Tue, 7 Oct 2025 19:24:46 +0100
Subject: [PATCH 19/26] allow workers to be started with a specific httpclient
should result in less httpclients being created overall with the range scraper now
---
RobloxUltimateScraper/Program.cs | 2 +-
RobloxUltimateScraper/RangeScraper.cs | 22 +++++++++++++++++++---
RobloxUltimateScraper/Scraper.cs | 26 ++++++++++++++++++--------
3 files changed, 38 insertions(+), 12 deletions(-)
diff --git a/RobloxUltimateScraper/Program.cs b/RobloxUltimateScraper/Program.cs
index b376ed7..ba69648 100644
--- a/RobloxUltimateScraper/Program.cs
+++ b/RobloxUltimateScraper/Program.cs
@@ -181,7 +181,7 @@ static void RunAssetScraper()
outputDirectory = Config.Default.OutputDirectory;
Scraper scraper = new Scraper(assetId, outputDirectory);
- scraper.Setup().Wait();
+ scraper.Setup(Http.Client).Wait();
Console.WriteLine($"Asset {assetId} has {scraper.TotalVersions} versions!");
diff --git a/RobloxUltimateScraper/RangeScraper.cs b/RobloxUltimateScraper/RangeScraper.cs
index 673c048..a4f8d91 100644
--- a/RobloxUltimateScraper/RangeScraper.cs
+++ b/RobloxUltimateScraper/RangeScraper.cs
@@ -34,7 +34,7 @@ public RangeScraper(ulong startRange, ulong endRange, string outputDirectory)
OutputDirectory = outputDirectory;
}
- public async Task StartWorker()
+ public async Task StartWorker(HttpClient httpClient)
{
while (EndRange > CurrentId)
{
@@ -56,7 +56,7 @@ public async Task StartWorker()
}
Scraper scraper = new Scraper(id, outputDirectory);
- var result = await scraper.Setup();
+ var result = await scraper.Setup(httpClient);
if (!result.Success)
{
Console.WriteLine($"Failed to download {id} ({result.Message})");
@@ -66,7 +66,7 @@ public async Task StartWorker()
OnAssetVersionsDiscovered?.Invoke(scraper.TotalVersions);
scraper.OnDownloadFinished += (bool success) => OnAssetDownloadFinished?.Invoke(success);
- await scraper.StartWorker();
+ await scraper.StartWorker(httpClient);
scraper.PrintDownloadStatistics();
scraper.WriteIndexFile();
@@ -75,5 +75,21 @@ public async Task StartWorker()
OnAssetFinished?.Invoke();
}
}
+
+ public async Task StartWorker()
+ {
+ HttpClient httpClient = Config.Default.SingleHttpClient ? Http.Client : Http.CreateClient();
+
+ try
+ {
+ await StartWorker(httpClient);
+ }
+ finally
+ {
+ // only dispose if it isnt the global http client
+ if (httpClient != Http.Client)
+ httpClient.Dispose();
+ }
+ }
}
}
diff --git a/RobloxUltimateScraper/Scraper.cs b/RobloxUltimateScraper/Scraper.cs
index 81e7831..92f6fd5 100644
--- a/RobloxUltimateScraper/Scraper.cs
+++ b/RobloxUltimateScraper/Scraper.cs
@@ -69,9 +69,9 @@ public struct SetupResult
public string Message;
}
- public async Task Setup()
+ public async Task Setup(HttpClient httpClient)
{
- var assetDeliveryInfo = await GetAssetDeliveryInformation(Http.Client);
+ var assetDeliveryInfo = await GetAssetDeliveryInformation(httpClient);
if (!assetDeliveryInfo.Success)
return new SetupResult { Success = false, Message = $"Failed to fetch versions for asset {AssetId}: {assetDeliveryInfo.Error}" };
@@ -350,10 +350,8 @@ private void FireAssetFailed()
///
/// Worker
// TODO: add try catch blocks. give 3 retries w/ exceptions
- public async Task StartWorker()
+ public async Task StartWorker(HttpClient httpClient)
{
- HttpClient httpClient = Config.Default.SingleHttpClient ? Http.Client : Http.CreateClient();
-
while (TotalVersions > CurrentVersion)
{
int version;
@@ -391,10 +389,22 @@ public async Task StartWorker()
await LogAssetFromCdnHttpMessageResponse(cdnDownloadResponse, version, cdnUrl);
FireAssetSuccess();
}
+ }
- // only dispose if it isnt the global http client
- if (httpClient != Http.Client)
- httpClient.Dispose();
+ public async Task StartWorker()
+ {
+ HttpClient httpClient = Config.Default.SingleHttpClient ? Http.Client : Http.CreateClient();
+
+ try
+ {
+ await StartWorker(httpClient);
+ }
+ finally
+ {
+ // only dispose if it isnt the global http client
+ if (httpClient != Http.Client)
+ httpClient.Dispose();
+ }
}
///
From 64fdea2f6cd37fe36762f71654e5a91600ddd558 Mon Sep 17 00:00:00 2001
From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com>
Date: Tue, 7 Oct 2025 19:37:29 +0100
Subject: [PATCH 20/26] stop automatically compressing images when using range
scraper
---
RobloxUltimateScraper/CommandLineConfig.cs | 9 ++++++++
RobloxUltimateScraper/Config.cs | 3 +++
RobloxUltimateScraper/FileWriter.cs | 4 ++--
RobloxUltimateScraper/RangeScraper.cs | 9 +++++++-
RobloxUltimateScraper/Scraper.cs | 26 +++++++++++++++++++++-
5 files changed, 47 insertions(+), 4 deletions(-)
diff --git a/RobloxUltimateScraper/CommandLineConfig.cs b/RobloxUltimateScraper/CommandLineConfig.cs
index 67b334b..6cd4648 100644
--- a/RobloxUltimateScraper/CommandLineConfig.cs
+++ b/RobloxUltimateScraper/CommandLineConfig.cs
@@ -210,11 +210,20 @@ public string BaseUrl
[Option("failifunauthenticated", Required = false, Default = false, HelpText = "Should fail if the current run is unauthenticated?")]
public bool FailIfUnauthenticated { get; set; } = false;
+ ///
+ /// Should only use a single and shared HTTP client for all scraper threads.
+ ///
[Option("singlehttpclient", Required = false, Default = false, HelpText = "Should only use a single and shared HTTP client for all scraper threads. This was the behaviour present in 0.1.3.0 and before. Other name: --shc.")]
public bool SingleHttpClient { get; set; } = false;
// hack... again!!!
[Option("shc", Required = false, Hidden = true)]
public bool? SingleHttpClientOtherName { get; set; }
+
+ ///
+ /// Should images gathered using the range scraper be compressed?
+ ///
+ [Option("compressimages", Required = false, Default = false, HelpText = "Should images gathered using the range scraper be compressed?")]
+ public bool CompressImages { get; set; } = false;
}
}
diff --git a/RobloxUltimateScraper/Config.cs b/RobloxUltimateScraper/Config.cs
index 7ea58e5..3772496 100644
--- a/RobloxUltimateScraper/Config.cs
+++ b/RobloxUltimateScraper/Config.cs
@@ -45,6 +45,9 @@ internal class Config
///
public bool SingleHttpClient { get; }
+ ///
+ public bool CompressImages => _clConfig.CompressImages;
+
public Config(CommandLineConfig config)
{
_clConfig = config;
diff --git a/RobloxUltimateScraper/FileWriter.cs b/RobloxUltimateScraper/FileWriter.cs
index 5ebcb09..dbb0e20 100644
--- a/RobloxUltimateScraper/FileWriter.cs
+++ b/RobloxUltimateScraper/FileWriter.cs
@@ -23,11 +23,11 @@ public static string BuildOutputFileName(string fileName, string? fileExtension)
/// File path
/// Stream
/// Last modified
- public static void Save(string filePath, Stream stream, int compressionLevel, DateTime? lastModified = null)
+ public static void Save(string filePath, Stream stream, CompressionType compressionType, int compressionLevel, DateTime? lastModified = null)
{
using (MemoryStream ms = new MemoryStream())
{
- switch (Config.Default.CompressionType)
+ switch (compressionType)
{
case CompressionType.GZip:
ICSharpCode.SharpZipLib.GZip.GZip.Compress(stream, ms, false);
diff --git a/RobloxUltimateScraper/RangeScraper.cs b/RobloxUltimateScraper/RangeScraper.cs
index a4f8d91..a34d0b7 100644
--- a/RobloxUltimateScraper/RangeScraper.cs
+++ b/RobloxUltimateScraper/RangeScraper.cs
@@ -1,4 +1,6 @@
-namespace RobloxUltimateScraper
+using RobloxUltimateScraper.Enums;
+
+namespace RobloxUltimateScraper
{
///
/// Range scraper
@@ -66,6 +68,11 @@ public async Task StartWorker(HttpClient httpClient)
OnAssetVersionsDiscovered?.Invoke(scraper.TotalVersions);
scraper.OnDownloadFinished += (bool success) => OnAssetDownloadFinished?.Invoke(success);
+
+ // do not bother compressing images gathered using this scraper (unless specified otherwise by the config)
+ if (scraper.AssetType == AssetType.Image && !Config.Default.CompressImages)
+ scraper.ShouldCompress = false;
+
await scraper.StartWorker(httpClient);
scraper.PrintDownloadStatistics();
diff --git a/RobloxUltimateScraper/Scraper.cs b/RobloxUltimateScraper/Scraper.cs
index 92f6fd5..1d42e80 100644
--- a/RobloxUltimateScraper/Scraper.cs
+++ b/RobloxUltimateScraper/Scraper.cs
@@ -19,12 +19,29 @@ internal class Scraper
private object _lock = new object();
+ ///
+ /// Asset ID being downloaded
+ ///
public ulong AssetId { get; }
+ ///
+ /// Asset type of the asset
+ ///
+ public AssetType AssetType { get; private set; } = AssetType.Unknown;
+
+ ///
+ /// Total versions present in this asset
+ ///
public int TotalVersions { get; private set; }
+ ///
+ /// Most recent version being downloaded
+ ///
public int CurrentVersion { get; private set; }
+ ///
+ /// Directory where to output the files to
+ ///
public string OutputDirectory { get; }
///
@@ -49,6 +66,11 @@ internal class Scraper
///
public event DownloadFinished? OnDownloadFinished;
+ ///
+ /// Should compress the files downloaded?
+ ///
+ public bool ShouldCompress { get; set; } = true;
+
///
/// Index entries
///
@@ -75,6 +97,7 @@ public async Task Setup(HttpClient httpClient)
if (!assetDeliveryInfo.Success)
return new SetupResult { Success = false, Message = $"Failed to fetch versions for asset {AssetId}: {assetDeliveryInfo.Error}" };
+ AssetType = assetDeliveryInfo.AssetType;
TotalVersions = assetDeliveryInfo.TotalVersions;
FileExtension = Config.Default.OutputExtension == "Auto" ? assetDeliveryInfo.AssetType.GetExtension() : Config.Default.OutputExtension;
@@ -315,7 +338,8 @@ private async Task LogAssetFromCdnHttpMessageResponse(HttpResponseMessage respon
DateTime? lastModifiedDT = lastModified != null ? DateTime.Parse(lastModified) : null;
- FileWriter.Save(outputPath, stream, Config.Default.CompressionLevel, lastModifiedDT);
+ CompressionType compressionType = ShouldCompress ? Config.Default.CompressionType : CompressionType.None;
+ FileWriter.Save(outputPath, stream, compressionType, Config.Default.CompressionLevel, lastModifiedDT);
}
}
From 87dd5ef4e618974d02837d46c26f3d895879bf7b Mon Sep 17 00:00:00 2001
From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com>
Date: Sat, 11 Oct 2025 21:35:58 +0100
Subject: [PATCH 21/26] allow file structure to be changed for range scraper
---
RobloxUltimateScraper/CommandLineConfig.cs | 12 +++-
RobloxUltimateScraper/Config.cs | 6 +-
.../Enums/MultipleDownloadStructureType.cs | 20 ++++++
.../Properties/launchSettings.json | 2 +-
RobloxUltimateScraper/RangeScraper.cs | 9 +--
RobloxUltimateScraper/Scraper.cs | 71 +++++++++++++++++--
6 files changed, 107 insertions(+), 13 deletions(-)
create mode 100644 RobloxUltimateScraper/Enums/MultipleDownloadStructureType.cs
diff --git a/RobloxUltimateScraper/CommandLineConfig.cs b/RobloxUltimateScraper/CommandLineConfig.cs
index 6cd4648..3429fb7 100644
--- a/RobloxUltimateScraper/CommandLineConfig.cs
+++ b/RobloxUltimateScraper/CommandLineConfig.cs
@@ -213,7 +213,7 @@ public string BaseUrl
///
/// Should only use a single and shared HTTP client for all scraper threads.
///
- [Option("singlehttpclient", Required = false, Default = false, HelpText = "Should only use a single and shared HTTP client for all scraper threads. This was the behaviour present in 0.1.3.0 and before. Other name: --shc.")]
+ [Option("singlehttpclient", Required = false, Default = false, HelpText = "Should only use a single and shared HTTP client for all scraper threads. This was the behaviour present in 0.1.2.0 and before. Other name: --shc.")]
public bool SingleHttpClient { get; set; } = false;
// hack... again!!!
@@ -225,5 +225,15 @@ public string BaseUrl
///
[Option("compressimages", Required = false, Default = false, HelpText = "Should images gathered using the range scraper be compressed?")]
public bool CompressImages { get; set; } = false;
+
+ ///
+ /// The file structure to use for range & list scrapes. This will not apply to single asset downloads.
+ ///
+ [Option("multipledownloadstructure", Required = false, Default = MultipleDownloadStructureType.Default, HelpText = "The file structure to use for range & list scrapes. This will not apply to single asset downloads. Other name: --mds.")]
+ public MultipleDownloadStructureType MultipleDownloadStructure { get; set; } = MultipleDownloadStructureType.Default;
+
+ // hack... again!!!
+ [Option("mds", Required = false, Hidden = true)]
+ public MultipleDownloadStructureType? MultipleDownloadStructureOtherName { get; set; }
}
}
diff --git a/RobloxUltimateScraper/Config.cs b/RobloxUltimateScraper/Config.cs
index 3772496..da269ed 100644
--- a/RobloxUltimateScraper/Config.cs
+++ b/RobloxUltimateScraper/Config.cs
@@ -6,7 +6,7 @@ internal class Config
{
public static Config Default { get; private set; } = null!;
- private CommandLineConfig _clConfig;
+ private readonly CommandLineConfig _clConfig;
public ulong ScraperAssetId => _clConfig.ScraperAssetId;
@@ -48,12 +48,16 @@ internal class Config
///
public bool CompressImages => _clConfig.CompressImages;
+ ///
+ public MultipleDownloadStructureType MultipleDownloadStructure { get; }
+
public Config(CommandLineConfig config)
{
_clConfig = config;
CompressionLevel = _clConfig.CompressionLevelArgOtherName != null ? (int)_clConfig.CompressionLevelArgOtherName : _clConfig.CompressionLevelArg;
SingleHttpClient = _clConfig.SingleHttpClientOtherName ?? _clConfig.SingleHttpClient;
+ MultipleDownloadStructure = _clConfig.MultipleDownloadStructureOtherName ?? _clConfig.MultipleDownloadStructure;
}
public static void Initialise(CommandLineConfig commandLineConfig)
diff --git a/RobloxUltimateScraper/Enums/MultipleDownloadStructureType.cs b/RobloxUltimateScraper/Enums/MultipleDownloadStructureType.cs
new file mode 100644
index 0000000..7282f99
--- /dev/null
+++ b/RobloxUltimateScraper/Enums/MultipleDownloadStructureType.cs
@@ -0,0 +1,20 @@
+namespace RobloxUltimateScraper.Enums
+{
+ internal enum MultipleDownloadStructureType
+ {
+ ///
+ /// All asset downloads are put in their own directories
+ ///
+ Default,
+
+ ///
+ /// All asset download files and index files are in the same directories
+ ///
+ Combined,
+
+ ///
+ /// Asset download files and index files are put into their respective directories
+ ///
+ Separated
+ }
+}
diff --git a/RobloxUltimateScraper/Properties/launchSettings.json b/RobloxUltimateScraper/Properties/launchSettings.json
index c1cf511..7620829 100644
--- a/RobloxUltimateScraper/Properties/launchSettings.json
+++ b/RobloxUltimateScraper/Properties/launchSettings.json
@@ -9,7 +9,7 @@
},
"RobloxUltimateScraper - Range Scraper": {
"commandName": "Project",
- "commandLineArgs": "-w 5\r\n-c Zstd\r\n--cl 9\r\n-o Both\r\n-r 1000000-1000100\r\n-i All"
+ "commandLineArgs": "-w 5\r\n-c Zstd\r\n--cl 9\r\n-o Both\r\n-r 1000000-1000100\r\n-i All\r\n--mds Separated"
}
}
}
\ No newline at end of file
diff --git a/RobloxUltimateScraper/RangeScraper.cs b/RobloxUltimateScraper/RangeScraper.cs
index a34d0b7..eea9a5f 100644
--- a/RobloxUltimateScraper/RangeScraper.cs
+++ b/RobloxUltimateScraper/RangeScraper.cs
@@ -49,15 +49,16 @@ public async Task StartWorker(HttpClient httpClient)
id = CurrentId;
}
- string outputDirectory = Path.Combine(OutputDirectory, $"Asset_{id}");
- if (Directory.Exists(outputDirectory) && File.Exists(Path.Combine(outputDirectory, "index.txt"))) // index.txt is an indication that the download was finished. does not work for non-index runs.
+ Scraper scraper = new Scraper(id, OutputDirectory);
+ scraper.SetupMultipleDownloadStructure(Config.Default.MultipleDownloadStructure, $"Asset_{id}");
+
+ if (File.Exists(scraper.GetIndexFilePath())) // index.txt is an indication that the download was finished. does not work for non-index runs.
{
- Console.WriteLine($"Skipping {id} - already done. Delete the directory to redo the download.");
+ Console.WriteLine($"Skipping {id} - already done. Delete the file/directory to redo the download.");
OnAssetFinished?.Invoke();
continue;
}
- Scraper scraper = new Scraper(id, outputDirectory);
var result = await scraper.Setup(httpClient);
if (!result.Success)
{
diff --git a/RobloxUltimateScraper/Scraper.cs b/RobloxUltimateScraper/Scraper.cs
index 1d42e80..6afdb4c 100644
--- a/RobloxUltimateScraper/Scraper.cs
+++ b/RobloxUltimateScraper/Scraper.cs
@@ -17,7 +17,7 @@ internal class Scraper
///
public delegate void DownloadFinished(bool success);
- private object _lock = new object();
+ private readonly object _lock = new object();
///
/// Asset ID being downloaded
@@ -42,7 +42,17 @@ internal class Scraper
///
/// Directory where to output the files to
///
- public string OutputDirectory { get; }
+ public string OutputDirectory { get; set; }
+
+ ///
+ /// Directory to output index files to. This value is prioritised over
+ ///
+ public string? IndexOutputDirectory { get; set; }
+
+ ///
+ /// Directory to output asset files to. This value is prioritised over
+ ///
+ public string? FilesOutputDirectory { get; set; }
///
/// File extension to be used for saving
@@ -76,6 +86,11 @@ internal class Scraper
///
private List _index = new List();
+ ///
+ /// Name of the output index files
+ ///
+ private string _indexName = "index";
+
///
/// Initialises values used by
///
@@ -91,6 +106,31 @@ public struct SetupResult
public string Message;
}
+ ///
+ /// Sets up the directory paths further for multiple download scrapers.
+ ///
+ /// Name of the child directory if needed
+ /// Directory structure type
+ public void SetupMultipleDownloadStructure(MultipleDownloadStructureType type, string childDirectoryName)
+ {
+ switch (type)
+ {
+ case MultipleDownloadStructureType.Default:
+ OutputDirectory = Path.Combine(OutputDirectory, childDirectoryName);
+ break;
+
+ case MultipleDownloadStructureType.Combined:
+ _indexName = $"{AssetId}_index";
+ break;
+
+ case MultipleDownloadStructureType.Separated:
+ _indexName = $"{AssetId}_index";
+ IndexOutputDirectory = Path.Combine(OutputDirectory, "Indexes");
+ FilesOutputDirectory = Path.Combine(OutputDirectory, "Files");
+ break;
+ }
+ }
+
public async Task Setup(HttpClient httpClient)
{
var assetDeliveryInfo = await GetAssetDeliveryInformation(httpClient);
@@ -101,8 +141,16 @@ public async Task Setup(HttpClient httpClient)
TotalVersions = assetDeliveryInfo.TotalVersions;
FileExtension = Config.Default.OutputExtension == "Auto" ? assetDeliveryInfo.AssetType.GetExtension() : Config.Default.OutputExtension;
+
+ // create all the directories we need
Directory.CreateDirectory(OutputDirectory);
+ if (!string.IsNullOrEmpty(IndexOutputDirectory))
+ Directory.CreateDirectory(IndexOutputDirectory);
+
+ if (!string.IsNullOrEmpty(FilesOutputDirectory))
+ Directory.CreateDirectory(FilesOutputDirectory);
+
return new SetupResult { Success = true };
}
@@ -319,6 +367,8 @@ private async Task LogAssetFromCdnHttpMessageResponse(HttpResponseMessage respon
int version,
string cdnUrl)
{
+ string outputDir = FilesOutputDirectory ?? OutputDirectory;
+
// get last modified
string? lastModified = null;
if (response.Content.Headers.TryGetValues("last-modified", out IEnumerable? lastModifiedValues))
@@ -333,7 +383,7 @@ private async Task LogAssetFromCdnHttpMessageResponse(HttpResponseMessage respon
if (Config.Default.OutputType.IsFileSavingEnabled())
{
string outputName = BuildAssetOutputFileName(version);
- string path = Path.Combine(OutputDirectory, outputName);
+ string path = Path.Combine(outputDir, outputName);
string outputPath = FileWriter.BuildOutputFileName(path, FileExtension);
DateTime? lastModifiedDT = lastModified != null ? DateTime.Parse(lastModified) : null;
@@ -449,7 +499,9 @@ public void WriteIndexFile()
if (!Config.Default.OutputType.IsIndexEnabled())
return;
- Directory.CreateDirectory(OutputDirectory);
+ string outputDir = IndexOutputDirectory ?? OutputDirectory;
+
+ Directory.CreateDirectory(outputDir);
// sort index values
_index.Sort();
@@ -468,7 +520,7 @@ public void WriteIndexFile()
string contents = builder.ToString();
- string path = Path.Combine(OutputDirectory, "index.txt");
+ string path = Path.Combine(outputDir, $"{_indexName}.txt");
indexPaths.Add(path);
File.WriteAllText(path, contents);
@@ -478,7 +530,7 @@ public void WriteIndexFile()
{
string contents = JsonSerializer.Serialize(_index);
- string path = Path.Combine(OutputDirectory, "index.json");
+ string path = Path.Combine(outputDir, $"{_indexName}.json");
indexPaths.Add(path);
File.WriteAllText(path, contents);
@@ -487,5 +539,12 @@ public void WriteIndexFile()
// write information about index
Console.WriteLine($"Index file(s) can be found at {string.Join(", ", indexPaths)}");
}
+
+ ///
+ /// Gets the path of the index file
+ ///
+ /// Get the path of the JSON variation of the index
+ /// Index file path
+ public string GetIndexFilePath(bool json = false) => Path.Combine(IndexOutputDirectory ?? OutputDirectory, $"{_indexName}.{(json ? "json" : "txt")}");
}
}
From 3d207a4db3cb2617f16e15004c2c7c661964e037 Mon Sep 17 00:00:00 2001
From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com>
Date: Sat, 11 Oct 2025 21:46:53 +0100
Subject: [PATCH 22/26] handle recent assetdelivery api changes
---
RobloxUltimateScraper/Scraper.cs | 37 ++++++++++++++++++++++++++------
1 file changed, 31 insertions(+), 6 deletions(-)
diff --git a/RobloxUltimateScraper/Scraper.cs b/RobloxUltimateScraper/Scraper.cs
index 6afdb4c..a23a230 100644
--- a/RobloxUltimateScraper/Scraper.cs
+++ b/RobloxUltimateScraper/Scraper.cs
@@ -187,6 +187,29 @@ public static bool IsSuccessStatusCode(HttpStatusCode code, bool allowForbidden
}
}
+ ///
+ /// Gets the status code from an assetdelivery response.
+ /// This function will use the field and the to deduce the correct status code.
+ ///
+ /// Response message from assetdelivery
+ /// Status code
+ private static HttpStatusCode GetAssetDeliveryStatusCode(HttpResponseMessage responseMessage)
+ {
+ switch (responseMessage.StatusCode)
+ {
+ case HttpStatusCode.Forbidden:
+ // roblox updated assetdelivery to return 403 for copylocked assets
+ // originally, it returned a 409
+ // this breaks a bunch of existing logic, so we have to check the body aswell
+ string responseContent = responseMessage.Content.ReadAsStringAsync().Result;
+ bool isConflict = responseContent.Contains("User is not authorized to access Asset."); // this should suffice for now
+ return isConflict ? HttpStatusCode.Conflict : HttpStatusCode.Forbidden;
+
+ default:
+ return responseMessage.StatusCode;
+ }
+ }
+
public struct AssetDeliveryInformation
{
public bool Success;
@@ -203,12 +226,13 @@ public struct AssetDeliveryInformation
public async Task GetAssetDeliveryInformation(HttpClient httpClient)
{
HttpResponseMessage response = await AssetRequest(httpClient);
+ HttpStatusCode statusCode = GetAssetDeliveryStatusCode(response);
- if (response.StatusCode == HttpStatusCode.Conflict)
+ if (statusCode == HttpStatusCode.Conflict)
return new AssetDeliveryInformation { Success = false, Error = "Insufficient permissions to download asset" };
- if (!IsSuccessStatusCode(response.StatusCode, allowForbidden: true)) // 403 means that the latest version is deleted but can still download
- return new AssetDeliveryInformation { Success = false, Error = $"Unhandled status code ({(int)response.StatusCode})" };
+ if (!IsSuccessStatusCode(statusCode, allowForbidden: true)) // 403 means that the latest version is deleted but can still download
+ return new AssetDeliveryInformation { Success = false, Error = $"Unhandled status code ({(int)statusCode})" };
IEnumerable? values;
int versions;
@@ -251,8 +275,9 @@ public async Task GetAssetDeliveryInformation(HttpClie
try
{
HttpResponseMessage response = await AssetRequest(httpClient, version);
+ HttpStatusCode statusCode = GetAssetDeliveryStatusCode(response);
- switch (response.StatusCode)
+ switch (statusCode)
{
case HttpStatusCode.Conflict:
return (false, "Insufficient permissions to download asset", "");
@@ -264,8 +289,8 @@ public async Task GetAssetDeliveryInformation(HttpClie
return (false, "Too many requests", "");
}
- if (!IsSuccessStatusCode(response.StatusCode, allowForbidden: true)) // 403 means that the latest version is deleted but can still download
- return (false, $"Unhandled status code ({(int)response.StatusCode}) ({await response.Content.ReadAsStringAsync()})", "");
+ if (!IsSuccessStatusCode(statusCode))
+ return (false, $"Unhandled status code ({(int)statusCode}) ({await response.Content.ReadAsStringAsync()})", "");
if (!response.Headers.TryGetValues("Location", out IEnumerable? values))
return (false, "Location header is missing", ""); // this should never happen, but handle anyways
From 5cc049062fe8140c19e58f20e9fd7b0499a55b4a Mon Sep 17 00:00:00 2001
From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com>
Date: Sat, 11 Oct 2025 21:49:37 +0100
Subject: [PATCH 23/26] exit early in single asset download for setup errors
---
RobloxUltimateScraper/Program.cs | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/RobloxUltimateScraper/Program.cs b/RobloxUltimateScraper/Program.cs
index ba69648..ad06f3b 100644
--- a/RobloxUltimateScraper/Program.cs
+++ b/RobloxUltimateScraper/Program.cs
@@ -181,7 +181,12 @@ static void RunAssetScraper()
outputDirectory = Config.Default.OutputDirectory;
Scraper scraper = new Scraper(assetId, outputDirectory);
- scraper.Setup(Http.Client).Wait();
+ var setupResult = scraper.Setup(Http.Client).Result;
+ if (!setupResult.Success)
+ {
+ Console.WriteLine(setupResult.Message);
+ return;
+ }
Console.WriteLine($"Asset {assetId} has {scraper.TotalVersions} versions!");
From 2a44ecf59281b453b88c9eb6d1d6c9ed859660a0 Mon Sep 17 00:00:00 2001
From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com>
Date: Sat, 11 Oct 2025 21:58:31 +0100
Subject: [PATCH 24/26] asset type whitelist
---
RobloxUltimateScraper/CommandLineConfig.cs | 10 ++++++++++
RobloxUltimateScraper/Config.cs | 7 +++++++
RobloxUltimateScraper/Program.cs | 6 ++++++
RobloxUltimateScraper/Properties/launchSettings.json | 2 +-
RobloxUltimateScraper/RangeScraper.cs | 8 ++++++++
5 files changed, 32 insertions(+), 1 deletion(-)
diff --git a/RobloxUltimateScraper/CommandLineConfig.cs b/RobloxUltimateScraper/CommandLineConfig.cs
index 3429fb7..c2748e2 100644
--- a/RobloxUltimateScraper/CommandLineConfig.cs
+++ b/RobloxUltimateScraper/CommandLineConfig.cs
@@ -235,5 +235,15 @@ public string BaseUrl
// hack... again!!!
[Option("mds", Required = false, Hidden = true)]
public MultipleDownloadStructureType? MultipleDownloadStructureOtherName { get; set; }
+
+ ///
+ /// Only download assets with the given asset type.
+ ///
+ [Option("expectedassettype", Required = false, Default = null, HelpText = "Only download assets with the given asset type. Other name: --eat")]
+ public AssetType? ExpectedAssetType { get; set; }
+
+ // hack... again!!!
+ [Option("eat", Required = false, Hidden = true)]
+ public AssetType? ExpectedAssetTypeOtherName { get; set; }
}
}
diff --git a/RobloxUltimateScraper/Config.cs b/RobloxUltimateScraper/Config.cs
index da269ed..8dba540 100644
--- a/RobloxUltimateScraper/Config.cs
+++ b/RobloxUltimateScraper/Config.cs
@@ -51,6 +51,9 @@ internal class Config
///
public MultipleDownloadStructureType MultipleDownloadStructure { get; }
+ ///
+ public AssetType? ExpectedAssetType { get; }
+
public Config(CommandLineConfig config)
{
_clConfig = config;
@@ -58,6 +61,10 @@ public Config(CommandLineConfig config)
CompressionLevel = _clConfig.CompressionLevelArgOtherName != null ? (int)_clConfig.CompressionLevelArgOtherName : _clConfig.CompressionLevelArg;
SingleHttpClient = _clConfig.SingleHttpClientOtherName ?? _clConfig.SingleHttpClient;
MultipleDownloadStructure = _clConfig.MultipleDownloadStructureOtherName ?? _clConfig.MultipleDownloadStructure;
+ ExpectedAssetType = _clConfig.ExpectedAssetTypeOtherName ?? _clConfig.ExpectedAssetType;
+
+ if (ExpectedAssetType == AssetType.Unknown)
+ throw new ApplicationException("Invalid value for ExpectedAssetType");
}
public static void Initialise(CommandLineConfig commandLineConfig)
diff --git a/RobloxUltimateScraper/Program.cs b/RobloxUltimateScraper/Program.cs
index ad06f3b..e0948f5 100644
--- a/RobloxUltimateScraper/Program.cs
+++ b/RobloxUltimateScraper/Program.cs
@@ -188,6 +188,12 @@ static void RunAssetScraper()
return;
}
+ if (Config.Default.ExpectedAssetType.HasValue && scraper.AssetType != Config.Default.ExpectedAssetType.Value)
+ {
+ Console.WriteLine($"Asset {assetId}'s type is not whitelisted (expected {Config.Default.ExpectedAssetType})");
+ return;
+ }
+
Console.WriteLine($"Asset {assetId} has {scraper.TotalVersions} versions!");
// set up titles
diff --git a/RobloxUltimateScraper/Properties/launchSettings.json b/RobloxUltimateScraper/Properties/launchSettings.json
index 7620829..ec5a221 100644
--- a/RobloxUltimateScraper/Properties/launchSettings.json
+++ b/RobloxUltimateScraper/Properties/launchSettings.json
@@ -9,7 +9,7 @@
},
"RobloxUltimateScraper - Range Scraper": {
"commandName": "Project",
- "commandLineArgs": "-w 5\r\n-c Zstd\r\n--cl 9\r\n-o Both\r\n-r 1000000-1000100\r\n-i All\r\n--mds Separated"
+ "commandLineArgs": "-w 5\r\n-c Zstd\r\n--cl 9\r\n-o Both\r\n-r 1000000-1000100\r\n-i All\r\n--mds Separated\r\n--eat Image"
}
}
}
\ No newline at end of file
diff --git a/RobloxUltimateScraper/RangeScraper.cs b/RobloxUltimateScraper/RangeScraper.cs
index eea9a5f..27f5cb0 100644
--- a/RobloxUltimateScraper/RangeScraper.cs
+++ b/RobloxUltimateScraper/RangeScraper.cs
@@ -66,6 +66,14 @@ public async Task StartWorker(HttpClient httpClient)
OnAssetError?.Invoke();
continue;
}
+
+ if (Config.Default.ExpectedAssetType.HasValue && scraper.AssetType != Config.Default.ExpectedAssetType.Value)
+ {
+ Console.WriteLine($"Asset {id}'s type is not whitelisted (expected {Config.Default.ExpectedAssetType})");
+ OnAssetFinished?.Invoke();
+ continue;
+ }
+
OnAssetVersionsDiscovered?.Invoke(scraper.TotalVersions);
scraper.OnDownloadFinished += (bool success) => OnAssetDownloadFinished?.Invoke(success);
From a2e8fe4fc69ecbef035cbeb80988d792ada4d498 Mon Sep 17 00:00:00 2001
From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com>
Date: Sat, 11 Oct 2025 22:11:45 +0100
Subject: [PATCH 25/26] fix title bar version counter using id count instead of
version count
---
RobloxUltimateScraper/Program.cs | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/RobloxUltimateScraper/Program.cs b/RobloxUltimateScraper/Program.cs
index e0948f5..e6d4875 100644
--- a/RobloxUltimateScraper/Program.cs
+++ b/RobloxUltimateScraper/Program.cs
@@ -233,7 +233,7 @@ static async Task RangeScraperTitleLogic(RangeScraperData data, CancellationToke
{
Console.Title = $"RobloxUltimateScraper | Range {data.StartRange}-{data.EndRange} | " +
$"{data.DownloadedIds}/{data.TotalIds} IDs ({data.ErrorIds} errors) | " +
- $"{data.DownloadedIds}/{data.TotalVersions} Versions ({data.ErrorVersions} errors)";
+ $"{data.DownloadedVersions}/{data.TotalVersions} Versions ({data.ErrorVersions} errors)";
try
{
From c91348c2472655ea1840ffe0eb3161457ac0ded7 Mon Sep 17 00:00:00 2001
From: bluepilledgreat <97983689+bluepilledgreat@users.noreply.github.com>
Date: Sat, 11 Oct 2025 22:14:33 +0100
Subject: [PATCH 26/26] fix empty directories being created with range scraper
---
RobloxUltimateScraper/Program.cs | 2 ++
RobloxUltimateScraper/RangeScraper.cs | 1 +
RobloxUltimateScraper/Scraper.cs | 10 ++++++++--
3 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/RobloxUltimateScraper/Program.cs b/RobloxUltimateScraper/Program.cs
index e6d4875..e5eb149 100644
--- a/RobloxUltimateScraper/Program.cs
+++ b/RobloxUltimateScraper/Program.cs
@@ -194,6 +194,8 @@ static void RunAssetScraper()
return;
}
+ scraper.CreateOutputDirectories();
+
Console.WriteLine($"Asset {assetId} has {scraper.TotalVersions} versions!");
// set up titles
diff --git a/RobloxUltimateScraper/RangeScraper.cs b/RobloxUltimateScraper/RangeScraper.cs
index 27f5cb0..6d86ca9 100644
--- a/RobloxUltimateScraper/RangeScraper.cs
+++ b/RobloxUltimateScraper/RangeScraper.cs
@@ -74,6 +74,7 @@ public async Task StartWorker(HttpClient httpClient)
continue;
}
+ scraper.CreateOutputDirectories();
OnAssetVersionsDiscovered?.Invoke(scraper.TotalVersions);
scraper.OnDownloadFinished += (bool success) => OnAssetDownloadFinished?.Invoke(success);
diff --git a/RobloxUltimateScraper/Scraper.cs b/RobloxUltimateScraper/Scraper.cs
index a23a230..8aa269d 100644
--- a/RobloxUltimateScraper/Scraper.cs
+++ b/RobloxUltimateScraper/Scraper.cs
@@ -142,6 +142,14 @@ public async Task Setup(HttpClient httpClient)
FileExtension = Config.Default.OutputExtension == "Auto" ? assetDeliveryInfo.AssetType.GetExtension() : Config.Default.OutputExtension;
+ return new SetupResult { Success = true };
+ }
+
+ ///
+ /// Creates all the necessary output directories
+ ///
+ public void CreateOutputDirectories()
+ {
// create all the directories we need
Directory.CreateDirectory(OutputDirectory);
@@ -150,8 +158,6 @@ public async Task Setup(HttpClient httpClient)
if (!string.IsNullOrEmpty(FilesOutputDirectory))
Directory.CreateDirectory(FilesOutputDirectory);
-
- return new SetupResult { Success = true };
}
///