-
Notifications
You must be signed in to change notification settings - Fork 8
Expand file tree
/
Copy pathScraping_flipkart.py
More file actions
56 lines (40 loc) · 1.78 KB
/
Scraping_flipkart.py
File metadata and controls
56 lines (40 loc) · 1.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
from bs4 import BeautifulSoup as soup
from urllib.request import urlopen as uReq
my_url="https://www.flipkart.com/search?q=samsung+mobiles&sid=tyy%2C4io&as=on&as-show=on&otracker=AS_QueryStore_HistoryAutoSuggest_0_2&otracker1=AS_QueryStore_HistoryAutoSuggest_0_2&as-pos=0&as-type=HISTORY&as-searchtext=sa"
uClient = uReq(my_url)
page_html = uClient.read()
uClient.close()
page_soup = soup(page_html, "html.parser")
containers = page_soup.findAll("div", { "class": "_3O0U0u"})
#print(len(containers))
#print(soup.prettify(containers[0]))
container = containers[0]
#print(container.div.img["alt"])
price = container.findAll("div", {"class": "col col-5-12 _2o7WAb"})
#print(price[0].text)
ratings = container.findAll("div", {"class": "niH0FQ"})
#rint(ratings[0].text)
filename = "products.csv"
f = open(filename, "w")
headers = "Product_Name, Pricing, Ratings \n"
f.write(headers)
for container in containers:
product_name = container.div.img["alt"]
price_container = container.findAll("div", {"class": "col col-5-12 _2o7WAb"})
price = price_container[0].text.strip()
rating_container = container.findAll("div", {"class": "niH0FQ"})
rating = rating_container[0].text
#rint("Product_Name:"+ product_name)
#print("Price: " + price)
#print("Ratings:" + rating)
#String parsing
trim_price=''.join(price.split(','))
rm_rupee = trim_price.split('₹')
add_rs_price = "Rs."+rm_rupee[1]
split_price = add_rs_price.split('E')
final_price = split_price[0]
split_rating = rating.split(" ")
final_rating = split_rating[0]
print(product_name.replace("," ,"|") +"," + final_price +"," + final_rating + "\n")
f.write(product_name.replace("," ,"|") +"," + final_price +"," + final_rating + "\n")
f.close()