-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbaike.py
More file actions
37 lines (30 loc) · 754 Bytes
/
baike.py
File metadata and controls
37 lines (30 loc) · 754 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# -*- coding: utf-8 -*-
# @Author: liuhao
# @Date: 2017-02-25 10:52:01
# @Last Modified by: liuhao
# @Last Modified time: 2017-02-25 12:43:42
import urllib
from bs4 import BeautifulSoup
base_url = "http://baike.baidu.com/item/"
html = "";
def getHTML(url):
response = urllib.urlopen(url)
global html
html = response.read()
print html
def parse():
soup = BeautifulSoup(html,"html.parser")
main = soup.select("div[class=main-content]")
if len(main)==1:
m = main[0]
summary = m.select("div[class=lemma-summary]")
print type(summary)
# if len(summary)==1:
# s = summary[0]
# print s
else:
print len(main)
print "aaa"
keyword = raw_input("please input keyword that you want to check.")
getHTML(base_url+keyword)
parse()