BasicPythonProject/main.py at master · MorrisLYL/BasicPythonProject · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73

#-------------------------------------------
# 匯入必要模組
#-------------------------------------------
from selenium import webdriver
from html.parser import HTMLParser


#-------------------------------------------
# 定義一個HTML解譯類別
#-------------------------------------------
class MyHTMLParser(HTMLParser):
    content=''
    print=False

    def handle_data(self, data):
        if data.strip()=='驚奇4超人':
            self.print=True

        if '期待度' in data.strip():
            self.print=False
        if data.strip()=='劇情介紹':
            self.print=True

        if '展開劇情簡介' in data.strip():
            self.print=False

        if self.print:
            self.content+=data

    def get_content(self):
        return self.content

#-------------------------------------------
# 載入Chrome驅動程式
#-------------------------------------------
driver = webdriver.Chrome("chromedriver.exe")


#-------------------------------------------
# 待拜訪的網址
#-------------------------------------------
urls=[
	'https://movies.yahoo.com.tw/movieinfo_main.html/id=5644'
	]


#-------------------------------------------
# 依序將範例網址交給瀏覽器
#-------------------------------------------
for url in urls:
    driver.get(url)

    # 取得網頁原始碼
    with open('out.txt', 'w', encoding='utf-8') as outfile:
        pageSource = driver.page_source

        #-------------------------------------------
        # 取出沒有標籤的內容
        #-------------------------------------------
        parser = MyHTMLParser()
        parser.feed(pageSource)

        content=parser.get_content()
        print(content)

        outfile.write(content)


#-------------------------------------------
# 關閉Chrome驅動程式
#-------------------------------------------
driver.close()