-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfeedship.py
More file actions
executable file
·170 lines (145 loc) · 6.64 KB
/
feedship.py
File metadata and controls
executable file
·170 lines (145 loc) · 6.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
feedship - RSS to YouTube URL converter for Newsboat
Author: David Grindle
Year: 2025
License: MIT
"""
import requests
import re
import sys
from urllib.parse import urlparse, urljoin
from bs4 import BeautifulSoup
def debug_print(message, level="INFO"):
"""Print debug messages with different levels"""
colors = {
"INFO": "\033[94m", # Blue
"SUCCESS": "\033[92m", # Green
"WARNING": "\033[93m", # Yellow
"ERROR": "\033[91m", # Red
"RESET": "\033[0m" # Reset
}
print(f"{colors.get(level, colors['RESET'])}[{level}] {message}{colors['RESET']}")
def get_youtube_rss(channel_url):
"""
Extract RSS feed URL from a YouTube channel URL with detailed debugging
"""
debug_print(f"Starting RSS extraction for: {channel_url}")
# Validate URL format
if not channel_url.startswith(('http://', 'https://')):
channel_url = 'https://' + channel_url
debug_print(f"Added https:// prefix: {channel_url}", "WARNING")
if 'youtube.com' not in channel_url and 'youtu.be' not in channel_url:
debug_print("URL doesn't appear to be a YouTube URL", "ERROR")
return None
try:
debug_print("Making request to YouTube...")
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
response = requests.get(channel_url, headers=headers, timeout=15)
debug_print(f"Received response: HTTP {response.status_code}")
if response.status_code != 200:
debug_print(f"Failed to retrieve page. Status code: {response.status_code}", "ERROR")
return None
debug_print("Parsing HTML content...")
soup = BeautifulSoup(response.text, 'html.parser')
# Look for channel ID in multiple potential locations
channel_id = None
debug_print("Searching for channel ID...")
# Method 1: Look for meta tags
meta_tag = soup.find('meta', {'property': 'channelId'}) or soup.find('meta', {'itemprop': 'channelId'})
if meta_tag and meta_tag.get('content'):
channel_id = meta_tag['content']
debug_print(f"Found channel ID in meta tag: {channel_id}", "SUCCESS")
# Method 2: Look for canonical link
if not channel_id:
canonical_link = soup.find('link', {'rel': 'canonical'})
if canonical_link and canonical_link.get('href'):
canonical_url = canonical_link['href']
if '/channel/' in canonical_url:
channel_id = canonical_url.split('/channel/')[-1].split('/')[0]
debug_print(f"Found channel ID in canonical URL: {channel_id}", "SUCCESS")
# Method 3: Look for JSON-LD data
if not channel_id:
json_ld = soup.find('script', {'type': 'application/ld+json'})
if json_ld:
debug_print("Found JSON-LD data, searching for channel ID...")
# This is a simplified approach - in reality, you'd need to parse the JSON
match = re.search(r'"channelId":"([^"]+)"', json_ld.string)
if match:
channel_id = match.group(1)
debug_print(f"Found channel ID in JSON-LD: {channel_id}", "SUCCESS")
# Method 4: Look for internal YouTube data
if not channel_id:
debug_print("Searching for internal YouTube data...")
match = re.search(r'"channelId":"([^"]+)"', response.text)
if match:
channel_id = match.group(1)
debug_print(f"Found channel ID in page text: {channel_id}", "SUCCESS")
if not channel_id:
debug_print("Could not find channel ID using any method", "ERROR")
# Try to extract from URL directly as last resort
if '/channel/' in channel_url:
channel_id = channel_url.split('/channel/')[-1].split('/')[0]
debug_print(f"Extracted channel ID from URL: {channel_id}", "WARNING")
else:
debug_print("No channel ID found. The channel might have restrictions.", "ERROR")
return None
# Construct RSS URL
rss_url = f"https://www.youtube.com/feeds/videos.xml?channel_id={channel_id}"
debug_print(f"Generated RSS URL: {rss_url}", "SUCCESS")
# Verify the RSS feed exists
debug_print("Verifying RSS feed...")
rss_response = requests.head(rss_url, timeout=10)
if rss_response.status_code == 200:
debug_print("RSS feed verified successfully!", "SUCCESS")
return rss_url
else:
debug_print(f"RSS feed returned status code: {rss_response.status_code}", "WARNING")
# Return the URL anyway as it might still work
return rss_url
except requests.exceptions.Timeout:
debug_print("Request timed out. The server might be slow or unresponsive.", "ERROR")
return None
except requests.exceptions.ConnectionError:
debug_print("Connection error. Please check your internet connection.", "ERROR")
return None
except requests.exceptions.RequestException as e:
debug_print(f"Request failed: {str(e)}", "ERROR")
return None
except Exception as e:
debug_print(f"Unexpected error: {str(e)}", "ERROR")
return None
def main():
"""Main function to run the YouTube RSS extractor"""
print("=" * 60)
print("YouTube Channel RSS Feed Extractor")
print("=" * 60)
if len(sys.argv) > 1:
# Use URL from command line argument
channel_url = sys.argv[1]
else:
# Prompt for URL
channel_url = input("Enter YouTube channel URL: ").strip()
if not channel_url:
debug_print("No URL provided. Exiting.", "ERROR")
sys.exit(1)
debug_print(f"Processing URL: {channel_url}")
rss_url = get_youtube_rss(channel_url)
print("\n" + "=" * 60)
if rss_url:
print(f"\n✅ RSS Feed URL: {rss_url}")
print("\nYou can use this RSS URL with any RSS reader to get updates")
print("from this YouTube channel.")
else:
print("\n❌ Failed to extract RSS URL.")
print("\nPossible reasons:")
print("- The channel might not exist or be unavailable")
print("- The channel might have restrictions")
print("- YouTube might have changed their page structure")
print("- There might be a network issue")
print("=" * 60)
if __name__ == "__main__":
main()