forked from blastron/PartyBot
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscrapePlaylist.py
More file actions
43 lines (33 loc) · 1.26 KB
/
scrapePlaylist.py
File metadata and controls
43 lines (33 loc) · 1.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
from bs4 import BeautifulSoup
import urllib2
import re
from playlist import Track, Playlist
base_url = 'http://compo.thasauce.net'
def parse_playlist(playlist_html):
soup = BeautifulSoup(playlist_html)
new_playlist = Playlist()
for parsedTrack in [parse_entry(rawTrack) for rawTrack in soup.find_all('div', {'class': 'item'})]:
new_playlist.AddTrack(parsedTrack)
return new_playlist
def parse_entry(entry):
# Grab some details from data attributes of item and it's children
song_id = entry.div["data-id"]
song_title = entry.div["data-title"]
song_artist = entry.div["data-author"]
relative_url = entry.a["data-file"]
song_url = ''.join([base_url, relative_url])
# Get the song's description from the description block
item_desc = entry.find('div', {'class': 'item_desc'})
song_description = item_desc.get_text()
return Track(song_id, song_title, song_artist, song_url, song_description)
if __name__ == "__main__":
playlist = scrapePlaylist("OHC220")
for song in playlist.tracks:
print "----------"
print song.id
print song.title
print song.artist
print song.url
print song.description
print "----------"
print "%i songs total."%(len(playlist.tracks))