From 6d2ff800cba82aa5dbb5971786184b522e5f1b2b Mon Sep 17 00:00:00 2001 From: Aleksei Sotnikov Date: Tue, 2 Jun 2026 01:03:41 +0700 Subject: [PATCH] add SOCKS5 proxy support (-p/--proxy flag) Adds a new --proxy CLI option that accepts socks5://host:port and routes HTTPS traffic through a SOCKS5 proxy. On the JVM, the proxy path uses a raw Socket(Proxy) + SSLSocket wrapper rather than hato, because the JDK HttpClient silently drops SOCKS proxies from a ProxySelector. Tracked as OpenJDK JDK-8214516 (Open, P4, since 2018, no fix planned). Raw Socket(Proxy) bypasses this because it talks HTTP/1.1 directly. On babashka, the proxy string is passed through to the underlying http-client. Native build is wired up with new reflection config and the socks URL protocol. Includes: - new SOCKS5 HTTP/1.1 client in src/r11y/lib/http.cljc with proper chunked transfer-encoding, gzip/deflate decoding, and SSL handshake - new --proxy flag in src/r11y/core.clj, threaded through extract-content-from-url - graal-config/reflect-config.json: add Proxy, Proxy$Type, ProxySelector, InetSocketAddress, SSLSocket, SSLSocketFactory, SSLContext - build-native.sh: add socks to --enable-url-protocols - README: document the --proxy option - new tests: chunked decoder, gunzip, maybe-decode, CLI parse --- README.md | 5 + build-native.sh | 2 +- graal-config/reflect-config.json | 58 +++++++ src/r11y/core.clj | 97 +++++------ src/r11y/lib/html.clj | 39 +++-- src/r11y/lib/http.cljc | 266 ++++++++++++++++++++++++++++++- test/r11y/core_test.clj | 49 ++++++ test/r11y/lib/http_test.cljc | 67 ++++++++ 8 files changed, 517 insertions(+), 66 deletions(-) create mode 100644 test/r11y/core_test.clj create mode 100644 test/r11y/lib/http_test.cljc diff --git a/README.md b/README.md index 5b4f8c8..9963e38 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,7 @@ A lightning fast, GraalVM-compiled CLI tool for extracting readable content from - Removes decorative SVGs, spacer images, layout tables, and duplicated UI chrome - GitHub-optimized extraction (README files, blob content) - Configurable link density threshold for content filtering +- SOCKS5 proxy support (`--proxy`) for accessing geo-blocked or firewalled content, with chunked transfer-encoding handled correctly - Babashka-compatible — usable from `bb` scripts via `:git/tag` deps, no GraalVM required - Fast startup with GraalVM native compilation (~40ms) @@ -108,6 +109,9 @@ r11y --link-density 0.3 https://example.com # GitHub blob URLs (automatically fetches raw content with metadata) r11y -m https://github.com/user/repo/blob/main/README.md +# Route traffic through a SOCKS5 proxy +r11y --proxy socks5://127.0.0.1:9050 https://example.com + # Show help r11y --help ``` @@ -116,6 +120,7 @@ r11y --help - `-m, --with-metadata` - Include YAML frontmatter with metadata (title, author, date, description, etc.) - `-l, --link-density N` - Link density threshold 0-1 (default: 0.5). Lower values are more aggressive at filtering link-heavy content. +- `-p, --proxy URL` - SOCKS5 proxy URL with host:port (e.g. `socks5://127.0.0.1:9050`). Useful for accessing geo-blocked or firewalled content. - `-v, --version` - Show version - `-h, --help` - Show help message diff --git a/build-native.sh b/build-native.sh index 257de18..4e9f223 100755 --- a/build-native.sh +++ b/build-native.sh @@ -35,7 +35,7 @@ $NATIVE_IMAGE \ --features=clj_easy.graal_build_time.InitClojureClasses \ --no-fallback \ --report-unsupported-elements-at-runtime \ - --enable-url-protocols=http,https \ + --enable-url-protocols=http,https,socks \ -H:ConfigurationFileDirectories=graal-config echo "" diff --git a/graal-config/reflect-config.json b/graal-config/reflect-config.json index 6a96d93..c7d1d78 100644 --- a/graal-config/reflect-config.json +++ b/graal-config/reflect-config.json @@ -25,6 +25,64 @@ "allPublicMethods": true, "allPublicConstructors": true }, + { + "name": "java.net.Proxy", + "allPublicMethods": true, + "allPublicConstructors": true + }, + { + "name": "java.net.Proxy$Type", + "allPublicMethods": true, + "allPublicFields": true + }, + { + "name": "java.net.ProxySelector", + "allPublicMethods": true, + "allPublicConstructors": true + }, + { + "name": "java.net.InetSocketAddress", + "allPublicMethods": true, + "allPublicConstructors": true + }, + { + "name": "javax.net.ssl.SSLSocket", + "allPublicMethods": true, + "allPublicConstructors": true + }, + { + "name": "javax.net.ssl.SSLSocketFactory", + "allPublicMethods": true + }, + { + "name": "javax.net.ssl.SSLSocketFactoryImpl", + "allPublicMethods": true, + "allPublicConstructors": true + }, + { + "name": "sun.security.ssl.SSLSocketFactoryImpl", + "allPublicMethods": true, + "allPublicConstructors": true + }, + { + "name": "sun.security.ssl.SSLSocketImpl", + "allPublicMethods": true, + "allPublicConstructors": true + }, + { + "name": "javax.net.ssl.SSLParameters", + "allPublicMethods": true, + "allPublicConstructors": true + }, + { + "name": "javax.net.ssl.SSLContext", + "allPublicMethods": true, + "allPublicConstructors": true + }, + { + "name": "javax.net.ssl.SSLContextSpi", + "allPublicMethods": true + }, { "name": "java.util.regex.Pattern", "allPublicMethods": true, diff --git a/src/r11y/core.clj b/src/r11y/core.clj index 2c3dbda..f4b2537 100644 --- a/src/r11y/core.clj +++ b/src/r11y/core.clj @@ -14,68 +14,75 @@ (println "Options:") (println " -l, --link-density N Link density threshold 0-1 (default: 0.5)") (println " -m, --with-metadata Include YAML frontmatter with metadata") + (println " -p, --proxy URL SOCKS5 proxy URL (e.g. socks5://127.0.0.1:9050)") (println " -v, --version Show version") (println " -h, --help Show this help message") (println "") (println "Example:") (println " r11y https://example.com") (println " r11y --link-density 0.3 https://example.com") - (println " r11y --with-metadata https://example.com")) + (println " r11y --with-metadata https://example.com") + (println " r11y --proxy socks5://127.0.0.1:9050 https://example.com")) (defn parse-args [args] (loop [args args - opts {:link-density-threshold 0.5 :with-metadata false :url nil}] + opts {:link-density-threshold 0.5 :with-metadata false :proxy nil :url nil}] (if (empty? args) opts (let [arg (first args) rest-args (rest args)] (cond (or (= arg "-h") (= arg "--help")) (assoc opts :help true) - (or (= arg "-v") (= arg "--version")) (assoc opts :version true) - (or (= arg "-m") (= arg "--with-metadata")) (recur rest-args (assoc opts :with-metadata true)) - (or (= arg "-l") (= arg "--link-density")) - (if (empty? rest-args) - (assoc opts :error "Missing value for --link-density") - (let [val (first rest-args) - parse-result (try (let [n (Double/parseDouble val)] - (if (and (>= n 0.0) (<= n 1.0)) - {:success true :value n} - {:success false :error (str "Link density must be between 0 and 1, got: " val)})) - (catch NumberFormatException _ - {:success false :error (str "Invalid number for --link-density: " val)}))] - (if (:success parse-result) - (recur (rest rest-args) (assoc opts :link-density-threshold (:value parse-result))) - (assoc opts :error (:error parse-result))))) - (str/starts-with? arg "-") (assoc opts :error (str "Unknown option: " arg)) - :else (if (:url opts) - (assoc opts :error "Multiple URLs provided. Only one URL is allowed.") - (recur rest-args (assoc opts :url arg)))))))) + (or (= arg "-v") (= arg "--version")) (assoc opts :version true) + (or (= arg "-m") (= arg "--with-metadata")) (recur rest-args (assoc opts :with-metadata true)) + (or (= arg "-p") (= arg "--proxy")) + (if (empty? rest-args) + (assoc opts :error "Missing value for --proxy") + (recur (rest rest-args) (assoc opts :proxy (first rest-args)))) + (or (= arg "-l") (= arg "--link-density")) + (if (empty? rest-args) + (assoc opts :error "Missing value for --link-density") + (let [val (first rest-args) + parse-result (try (let [n (Double/parseDouble val)] + (if (and (>= n 0.0) (<= n 1.0)) + {:success true :value n} + {:success false :error (str "Link density must be between 0 and 1, got: " val)})) + (catch NumberFormatException _ + {:success false :error (str "Invalid number for --link-density: " val)}))] + (if (:success parse-result) + (recur (rest rest-args) (assoc opts :link-density-threshold (:value parse-result))) + (assoc opts :error (:error parse-result))))) + (str/starts-with? arg "-") (assoc opts :error (str "Unknown option: " arg)) + :else (if (:url opts) + (assoc opts :error "Multiple URLs provided. Only one URL is allowed.") + (recur rest-args (assoc opts :url arg)))))))) (defn -main [& args] (let [opts (parse-args args)] (cond (:version opts) (do (println (str "r11y " version)) - (println "https://github.com/dazld/r11y/releases") - (System/exit 0)) - (:help opts) (do (print-usage) (System/exit 0)) - (:error opts) (do - (println "Error:" (:error opts)) - (println) - (print-usage) - (System/exit 1)) - (nil? (:url opts)) (do - (println "Error: No URL provided") - (println) - (print-usage) - (System/exit 1)) - :else (try (let [result (html/extract-content-from-url (:url opts) - :format :markdown - :link-density-threshold - (:link-density-threshold opts) - :with-metadata (:with-metadata opts))] - (println (:markdown result)) - (System/exit 0)) - (catch Exception e - (binding [*out* *err*] - (println "Error extracting content:" (.getMessage e))) - (System/exit 1)))))) + (println "https://github.com/dazld/r11y/releases") + (System/exit 0)) + (:help opts) (do (print-usage) (System/exit 0)) + (:error opts) (do + (println "Error:" (:error opts)) + (println) + (print-usage) + (System/exit 1)) + (nil? (:url opts)) (do + (println "Error: No URL provided") + (println) + (print-usage) + (System/exit 1)) + :else (try (let [result (html/extract-content-from-url (:url opts) + :format :markdown + :link-density-threshold + (:link-density-threshold opts) + :with-metadata (:with-metadata opts) + :proxy (:proxy opts))] + (println (:markdown result)) + (System/exit 0)) + (catch Exception e + (binding [*out* *err*] + (println "Error extracting content:" (.getMessage e))) + (System/exit 1)))))) diff --git a/src/r11y/lib/html.clj b/src/r11y/lib/html.clj index 4a6469f..ce71a8a 100644 --- a/src/r11y/lib/html.clj +++ b/src/r11y/lib/html.clj @@ -807,18 +807,25 @@ (re-find #"(?m)^[-*+]\s+\S" s) (re-find #"(?m)^\d+\.\s+\S" s))))) +(def ^:const default-fetch-headers + "Default request identity for content extraction. The User-Agent + and Sec-Fetch-* headers make servers serve the same content they + would serve to a Safari user, which is the common case r11y is + built to read." + {"User-Agent" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.6 Safari/605.1.15" + "Accept" "text/markdown,text/html;q=0.9,application/xhtml+xml;q=0.9,application/xml;q=0.8,*/*;q=0.7" + "Accept-Encoding" "gzip, deflate" + "Accept-Language" "en-GB,en;q=0.9" + "Priority" "u=0, i" + "Sec-Fetch-Dest" "document" + "Sec-Fetch-Mode" "navigate" + "Sec-Fetch-Site" "none"}) + (defn- fetch-url - "Fetch URL with common headers and return response map." - [url] - (http/get-url url {:as :byte-array - :headers {"User-Agent" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.6 Safari/605.1.15" - "Accept" "text/markdown,text/html;q=0.9,application/xhtml+xml;q=0.9,application/xml;q=0.8,*/*;q=0.7" - "Accept-Encoding" "gzip, deflate" - "Accept-Language" "en-GB,en;q=0.9" - "Priority" "u=0, i" - "Sec-Fetch-Dest" "document" - "Sec-Fetch-Mode" "navigate" - "Sec-Fetch-Site" "none"}})) + "Fetch URL with default headers. Return response map." + [url {:keys [proxy]}] + (http/get-url url (cond-> {:as :byte-array :headers default-fetch-headers} + proxy (assoc :proxy proxy)))) (defn extract-content-from-url "Extract main content from a URL. Returns clean HTML by default. @@ -828,13 +835,17 @@ :with-metadata - include YAML frontmatter with metadata (default false) :content - pre-fetched HTML content (String or bytes), skips initial fetch :content-type - content-type of pre-fetched content - :fetch-fn - custom fetch function returning http-kit style response map" + :fetch-fn - custom fetch function returning http-kit style response map + :proxy - proxy URL string (e.g. socks5://127.0.0.1:9050)" [url & {:keys [format link-density-threshold with-metadata - content content-type fetch-fn] + content content-type fetch-fn proxy] :or {format :html link-density-threshold default-link-density-threshold with-metadata false}}] - (let [do-fetch (or fetch-fn fetch-url) + (let [do-fetch (or fetch-fn + (let [base-opts (cond-> {:as :byte-array :headers default-fetch-headers} + proxy (assoc :proxy proxy))] + (fn [u] (http/get-url u base-opts)))) normalized-url (normalize-github-url url) urls-differ? (not= normalized-url url) ;; Step 1: Resolve original content diff --git a/src/r11y/lib/http.cljc b/src/r11y/lib/http.cljc index c459196..944af3c 100644 --- a/src/r11y/lib/http.cljc +++ b/src/r11y/lib/http.cljc @@ -1,6 +1,11 @@ (ns r11y.lib.http + (:require [clojure.string :as str]) #?(:bb (:require [babashka.http-client :as http]) - :clj (:require [hato.client :as hato]))) + :clj (:require [hato.client :as hato])) + #?(:clj (:import [java.io ByteArrayOutputStream InputStream + OutputStream OutputStreamWriter IOException] + [java.net URI InetSocketAddress Proxy Proxy$Type Socket] + [javax.net.ssl SSLSocket SSLContext]))) (defn- keywordize-headers [headers] (reduce-kv (fn [m k v] @@ -14,9 +19,258 @@ opts) :clj opts)) +(defn- parse-proxy-uri + [^String proxy-str] + (let [uri (URI. proxy-str) + host (.getHost uri) + port (if (pos? (.getPort uri)) (.getPort uri) 1080)] + (when (or (nil? host) (zero? (count host))) + (throw (ex-info "Invalid proxy URL: missing host" {:proxy proxy-str}))) + [host port])) + +#?(:clj + (do + (defn- socks5-socket + "Open a TCP socket to host:port routed through a SOCKS5 proxy. + Wraps the socket in SSL when the target scheme is https." + [^String target-url ^String proxy-host ^long proxy-port + connect-timeout-ms] + (let [target-uri (URI. target-url) + target-host (.getHost target-uri) + target-port (if (pos? (.getPort target-uri)) + (.getPort target-uri) + (case (.getScheme target-uri) + "https" 443 + "http" 80 + 80)) + proxy-addr (InetSocketAddress. ^String proxy-host ^long proxy-port) + proxy (Proxy. Proxy$Type/SOCKS proxy-addr) + sock (doto (Socket. proxy) + (.connect (InetSocketAddress. ^String target-host ^long target-port) + ^long connect-timeout-ms)) + secure? (= "https" (.getScheme target-uri)) + sock (if secure? + (let [ssl-context (SSLContext/getDefault) + ssl-factory (.getSocketFactory ssl-context) + ssl-sock (.createSocket ssl-factory sock ^String target-host ^long target-port true)] + (.startHandshake ^SSLSocket ssl-sock) + ssl-sock) + sock)] + sock)) + + (defn- drain-stream + "Read all bytes from an InputStream into a byte array." + [^InputStream in] + (with-open [out (ByteArrayOutputStream.)] + (let [buf (byte-array 8192)] + (loop [] + (let [n (.read in buf)] + (when (pos? n) + (.write out buf 0 n) + (recur))))) + (.toByteArray out))) + + (defn- read-line-raw + "Read a line from a byte stream (delimited by CRLF) without + consuming beyond the newline." + [^InputStream in] + (let [buf (ByteArrayOutputStream.)] + (loop [] + (let [b (.read in)] + (cond + (neg? b) (when (pos? (.size buf)) + (.toString buf "UTF-8")) + (= b 13) (do + (.read in) ; consume LF + (.toString buf "UTF-8")) + :else (do + (.write buf b) + (recur))))))) + + (defn- read-chunked + "Read HTTP/1.1 chunked transfer-encoded body from an InputStream. + Each chunk: \r\n\r\n, terminated by 0\r\n\r\n." + [^InputStream in] + (let [out (ByteArrayOutputStream.)] + (loop [] + (let [size-line (read-line-raw in)] + (when (nil? size-line) + (throw (IOException. "EOF in chunked body before end chunk"))) + (let [size (Integer/parseInt (str/trim size-line) 16)] + (cond + (zero? size) (do + ;; consume trailing CRLF after end chunk + (read-line-raw in) + (.toByteArray out)) + (pos? size) (let [arr (byte-array size) + read (atom 0)] + (while (< @read size) + (let [r (.read in arr @read (- size @read))] + (when (neg? r) + (throw (IOException. "EOF mid-chunk"))) + (swap! read + r))) + (.write out arr) + (read-line-raw in) ; consume CRLF after chunk + (recur)))))))) + + (defn- read-headers-raw + "Read HTTP headers (raw, byte-oriented)." + [^InputStream in] + (loop [headers {}] + (let [line (read-line-raw in)] + (cond + (nil? line) headers + (zero? (count line)) headers + :else + (let [idx (.indexOf ^String line (int \:))] + (if (neg? idx) + (recur headers) + (let [k (subs ^String line 0 idx) + v (.trim (subs ^String line (inc idx)))] + (recur (assoc headers + (keyword (.toLowerCase ^String k)) + v))))))))) + + (defn- gunzip + "Decompress a gzipped byte array." + [^bytes body] + (let [in (java.util.zip.GZIPInputStream. (java.io.ByteArrayInputStream. body)) + out (ByteArrayOutputStream.)] + (try + (let [buf (byte-array 8192)] + (loop [] + (let [n (.read in buf)] + (when (pos? n) + (.write out buf 0 n) + (recur))))) + (.toByteArray out) + (finally + (.close in))))) + + (defn- inflate + "Inflate a deflate-encoded byte array. Tries zlib header first, + then raw deflate if that fails." + [^bytes body] + (let [try-inflate (fn [with-header?] + (let [in (java.util.zip.InflaterInputStream. + (java.io.ByteArrayInputStream. body) + (java.util.zip.Inflater. ^boolean with-header?)) + out (ByteArrayOutputStream.)] + (try + (let [buf (byte-array 8192)] + (loop [] + (let [n (.read in buf)] + (when (pos? n) + (.write out buf 0 n) + (recur))))) + (.toByteArray out) + (finally + (.close in)))))] + (try (try-inflate true) + (catch Exception _ + (try-inflate false))))) + + (defn- maybe-decode + "Apply Content-Encoding decoding to body bytes. Handles gzip and + deflate; returns bytes unchanged if no encoding or unknown." + [^bytes body headers] + (let [enc (or (get headers :content-encoding) "")] + (cond + (nil? body) body + (re-find #"(?i)gzip" enc) (gunzip body) + (re-find #"(?i)deflate" enc) (inflate body) + :else body))) + + (defn- socks5-get + "Issue an HTTP(S) GET through a SOCKS5 proxy by speaking HTTP/1.1 + over a manually connected socket." + [url proxy-str headers timeout-ms] + (let [[proxy-host proxy-port] (parse-proxy-uri proxy-str) + sock (socks5-socket url proxy-host proxy-port timeout-ms) + result (try + (let [target-uri (URI. url) + target-host (.getHost target-uri) + sock-in (.getInputStream ^Socket sock) + sock-out (.getOutputStream ^Socket sock) + out (OutputStreamWriter. sock-out "UTF-8") + path (str (or (.getRawPath target-uri) "/") + (when-let [q (.getRawQuery target-uri)] + (str "?" q))) + request-lines (cons (str "GET " path " HTTP/1.1") + (map (fn [[k v]] (str k ": " v)) + (assoc headers + "Host" target-host + "Connection" "close"))) + request (str (str/join "\r\n" request-lines) "\r\n\r\n")] + (.write out request) + (.flush out) + (let [status-line (read-line-raw sock-in) + _ (when (nil? status-line) + (throw (IOException. "Empty response from server"))) + status (or (some #(when (re-matches #"\d{3}" %) + (Integer/parseInt %)) + (str/split status-line #" ")) + (throw (IOException. (str "Bad status line: " status-line)))) + resp-headers (read-headers-raw sock-in) + body (cond + (= "chunked" (get resp-headers :transfer-encoding)) + (read-chunked sock-in) + (get resp-headers :content-length) + (let [cl (get resp-headers :content-length) + n (Integer/parseInt cl) + arr (byte-array n) + read (atom 0)] + (while (< @read n) + (let [r (.read sock-in arr @read (- n @read))] + (when (neg? r) + (throw (IOException. "EOF before Content-Length"))) + (swap! read + r))) + arr) + :else + (drain-stream sock-in)) + body (maybe-decode body resp-headers)] + {:status status + :headers resp-headers + :body body})) + (finally + (.close ^Socket sock)))] + result)))) + +(defn proxy->opts + "Convert a proxy URL string (e.g. socks5://127.0.0.1:9050) to a marker + that get-url recognises and applies via a SOCKS5-aware code path." + [proxy-str] + (when proxy-str + (let [[host port] (try (parse-proxy-uri proxy-str) + (catch Exception _ [nil nil]))] + (when (and host port) + (let [marker [:socks5 proxy-str]] + #?(:clj {:__socks5__ marker} + :bb {:proxy proxy-str})))))) + +(defn- socks5? + [opts] + (and (:__socks5__ opts) (vector? (:__socks5__ opts)) + (= :socks5 (first (:__socks5__ opts))))) + +(def ^:const default-socks5-timeout-ms 30000) + (defn get-url [url opts] - (let [opts (normalize-opts opts)] - #?(:bb (-> (http/get url opts) - (update :headers keywordize-headers)) - :clj (-> (hato/get url opts) - (update :headers keywordize-headers))))) + (let [opts (normalize-opts opts) + opts (merge opts (proxy->opts (:proxy opts))) + timeout-ms (or (:__socks5_timeout__ opts) default-socks5-timeout-ms)] + (if (socks5? opts) + #?(:clj + (let [proxy-str (second (:__socks5__ opts)) + headers (or (:headers opts) {}) + resp (socks5-get url proxy-str headers timeout-ms) + body (if (= :string (:as opts)) + (String. ^bytes (:body resp) "UTF-8") + (:body resp))] + (assoc resp :body body)) + :bb (throw (ex-info "SOCKS5 proxy is not supported on babashka" + {:proxy (:proxy opts)}))) + #?(:bb (-> (http/get url opts) + (update :headers keywordize-headers)) + :clj (-> (hato/get url opts) + (update :headers keywordize-headers)))))) diff --git a/test/r11y/core_test.clj b/test/r11y/core_test.clj new file mode 100644 index 0000000..64e359d --- /dev/null +++ b/test/r11y/core_test.clj @@ -0,0 +1,49 @@ +(ns r11y.core-test + (:require [clojure.test :refer [deftest is testing]] + [r11y.core :as core])) + +(deftest parse-args-default + (testing "Defaults: no flags, just a URL" + (is (= {:link-density-threshold 0.5 :with-metadata false :proxy nil :url "https://example.com"} + (core/parse-args ["https://example.com"]))))) + +(deftest parse-args-with-metadata + (testing "-m enables with-metadata" + (is (= true (:with-metadata (core/parse-args ["-m" "https://example.com"])))) + (is (= true (:with-metadata (core/parse-args ["--with-metadata" "https://example.com"])))))) + +(deftest parse-args-link-density + (testing "-l sets link density" + (is (= 0.3 (:link-density-threshold (core/parse-args ["-l" "0.3" "https://example.com"])))) + (is (= 0.8 (:link-density-threshold (core/parse-args ["--link-density" "0.8" "https://example.com"]))))) + (testing "Invalid link density returns error" + (is (some? (:error (core/parse-args ["-l" "abc" "https://example.com"])))) + (is (some? (:error (core/parse-args ["-l" "1.5" "https://example.com"])))))) + +(deftest parse-args-proxy + (testing "-p captures proxy string" + (is (= "socks5://127.0.0.1:9050" + (:proxy (core/parse-args ["-p" "socks5://127.0.0.1:9050" "https://example.com"]))))) + (testing "--proxy (long form) captures proxy string" + (is (= "socks5://10.0.1.23:9090" + (:proxy (core/parse-args ["--proxy" "socks5://10.0.1.23:9090" "https://example.com"]))))) + (testing "Proxy combined with other flags" + (is (= {:link-density-threshold 0.3 + :with-metadata true + :proxy "socks5://127.0.0.1:9050" + :url "https://example.com"} + (core/parse-args ["-m" "-l" "0.3" "-p" "socks5://127.0.0.1:9050" "https://example.com"])))) + (testing "Proxy flag without value returns error" + (is (some? (:error (core/parse-args ["-p"])))))) + +(deftest parse-args-help-and-version + (testing "-h sets help flag" + (is (true? (:help (core/parse-args ["-h"]))))) + (testing "-v sets version flag" + (is (true? (:version (core/parse-args ["-v"])))))) + +(deftest parse-args-errors + (testing "Multiple URLs return error" + (is (some? (:error (core/parse-args ["https://a.com" "https://b.com"]))))) + (testing "Unknown option returns error" + (is (some? (:error (core/parse-args ["--foo" "https://example.com"])))))) diff --git a/test/r11y/lib/http_test.cljc b/test/r11y/lib/http_test.cljc new file mode 100644 index 0000000..dbfe01d --- /dev/null +++ b/test/r11y/lib/http_test.cljc @@ -0,0 +1,67 @@ +(ns r11y.lib.http-test + (:require [clojure.test :refer [deftest is testing]] + [r11y.lib.http :as http]) + #?(:clj (:import [java.net ProxySelector Proxy Proxy$Type InetSocketAddress Socket URI]))) + +(deftest proxy->opts-nil + (testing "nil proxy returns nil" + (is (nil? (http/proxy->opts nil))))) + +(deftest proxy->opts-socks5 + (testing "SOCKS5 URL is recognised and produces a marker opts map" + #?(:clj + (let [opts (http/proxy->opts "socks5://127.0.0.1:9050")] + (is (map? opts)) + (is (contains? opts :__socks5__)) + (is (= :socks5 (first (:__socks5__ opts)))) + (is (= "socks5://127.0.0.1:9050" (second (:__socks5__ opts))))) + :bb + (let [opts (http/proxy->opts "socks5://127.0.0.1:9050")] + (is (= {:proxy "socks5://127.0.0.1:9050"} opts)))))) + +(deftest proxy->opts-default-port + (testing "SOCKS5 URL without explicit port still produces a valid marker" + #?(:clj + (let [opts (http/proxy->opts "socks5://127.0.0.1")] + (is (map? opts)) + (is (contains? opts :__socks5__)))))) + +#?(:bb + (deftest proxy->opts-bb-socks5 + (testing "BB path passes string through to bb http-client" + (is (= {:proxy "socks5://127.0.0.1:9050"} + (http/proxy->opts "socks5://127.0.0.1:9050")))))) + +#?(:clj + (deftest read-chunked-decodes-chunked-body + (testing "chunked transfer-encoded body is decoded into raw bytes" + (let [payload "5\r\nhello\r\n6\r\n world\r\n0\r\n\r\n" + in (java.io.ByteArrayInputStream. (.getBytes payload "UTF-8")) + out (#'http/read-chunked in)] + (is (= "hello world" (String. ^bytes out "UTF-8"))))))) + +#?(:clj + (deftest read-chunked-handles-trailing-headers + (testing "end chunk followed by trailing CRLF is consumed correctly" + (let [payload "3\r\nfoo\r\n0\r\n\r\n" + in (java.io.ByteArrayInputStream. (.getBytes payload "UTF-8")) + out (#'http/read-chunked in)] + (is (= "foo" (String. ^bytes out "UTF-8"))))))) + +#?(:clj + (deftest gunzip-decompresses + (testing "gzip byte array is decompressed" + (let [orig "hello hello hello" + baos (java.io.ByteArrayOutputStream.) + _ (with-open [gz-out (java.util.zip.GZIPOutputStream. baos)] + (.write gz-out (.getBytes orig "UTF-8"))) + gz-bytes (.toByteArray baos) + out (#'http/gunzip gz-bytes)] + (is (= orig (String. ^bytes out "UTF-8"))))))) + +#?(:clj + (deftest maybe-decode-skips-uncompressed + (testing "bodies with no Content-Encoding pass through unchanged" + (let [body (.getBytes "plain" "UTF-8") + out (#'http/maybe-decode body {:other-header "x"})] + (is (identical? body out))))))