-
Notifications
You must be signed in to change notification settings - Fork 23
Expand file tree
/
Copy pathword_freq_counter.py
More file actions
executable file
·42 lines (37 loc) · 1.21 KB
/
word_freq_counter.py
File metadata and controls
executable file
·42 lines (37 loc) · 1.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import requests
from bs4 import BeautifulSoup
import operator
def start(url):
list = []
source = requests.get(url).text
soup = BeautifulSoup(source)
for list in soup.findAll('a', {'class': 'index_singleListingTitles'}):
content = post_text.string
#puts it into string
words = content.lower().split()
# puts all in lower case and splits ito keywords
#stored in dictionary for no dupes
for each_word in words
list.append(each_word)
clean_up_list(list)
def clean_up_list(list):
clean_word_list = []
for word in list
## regex symbols to remove
symbols = "!@#$%^&*()[]_+?><{}:\""
for i in range(0, len(symbols))
word = world.replace(symbols[i],"")
if len(word) > 0:
clean_word_list.append(word)
createdictionary(clean_word_list)
def createdictionary(clean_word_list):
count={}
for word in clean_word_list:
if word in count
count[word] += 1
else:
count[word] = 1
#sorted goes through each item
for key, value in sorted(count.items(),key=operator.itemgetter(1)):
print(key,value)
start(url)