Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

trying - but now worked #3

Open
sarathbdm opened this issue Sep 13, 2018 · 3 comments
Open

trying - but now worked #3

sarathbdm opened this issue Sep 13, 2018 · 3 comments

Comments

@sarathbdm
Copy link

print ("#" + str(service_count) + " " + dict_service['Name']
(service_count) += 1

shows syntax error here and bs4 module not found error

@Dhiren-Biren
Copy link

Dhiren-Biren commented Apr 12, 2019

from bs4 import BeautifulSoup
import urllib
import requests
import csv
def innerHTML(element):
return element.decode_contents(formatter="html")

def get_name(body):
return body.find('span', {'class':'jcn'}).a.string

def get_phone_number(body):
try:
return body.find('p', {'class':'contact-info'}).span.a.string
except AttributeError:
return ''

def get_rating(body):
rating = 0.0
text = body.find('span', {'class':'star_m'})
if text is not None:
for item in text:
rating += float(item['class'][0][1:])/10

return rating

def get_rating_count(body):
text = body.find('span', {'class':'rt_count'}).string

Get only digits

rating_count =''.join(i for i in text if i.isdigit())
return rating_count

def get_address(body):
return body.find('span', {'class':'mrehover'}).text.strip()
def get_location(body):
text = body.find('a', {'class':'rsmap'})
if text == None:
return
text_list = text['onclick'].split(",")
latitutde = text_list[3].strip().replace("'", "")
longitude = text_list[4].strip().replace("'", "")
return latitutde + ", " + longitude
page_number = 1
service_count = 1
fields = ['Name', 'Phone_number', 'Rating', 'Rating Count', 'Address', 'Location']
out_file = open('Dhiren Hyderabad_cars.csv','w')
csvwriter = csv.DictWriter(out_file, delimiter=',', fieldnames=fields)

Write fields first

#csvwriter.writerow(dict((fn,fn) for fn in fields))

while True:

# Check if reached end of result
if page_number > 50:
	break

url="https://www.justdial.com/Hyderabad/Car-Repair-Services/nct-10976632/page-%s" % 1
req = urllib.request.Request(url, headers={'User-Agent' : "Magic Browser"}) 
page = urllib.request.urlopen( req )
# page=urllib2.urlopen(url)

soup = BeautifulSoup(page.read(), "html.parser")
services = soup.find_all('li', {'class': 'cntanr'})


# Iterate through the 10 results in the page
for service_html in services:

	# Parse HTML to fetch data
	dict_service = {}
	name = get_name(service_html)
	phone = get_phone_number(service_html)
	rating = get_rating(service_html)
	count = get_rating_count(service_html)
	address = get_address(service_html)
	location = get_location(service_html)
	if name != None:
		dict_service['Name'] = name
	if phone != None:
		print('getting phone number')
		dict_service['Phone'] = phone
	if rating != None:
		dict_service['Rating'] = rating
	if count != None:
		dict_service['Rating Count'] = count
	if address != None:
		dict_service['Address'] = address
	if location != None:
		dict_service['Address'] = location

	# Write row to CSV
	csvwriter.writerow(dict_service)

	print("#" + str(service_count) + " " , dict_service)
	service_count += 1

page_number += 1

out_file.close()

@Dhiren-Biren
Copy link

the above Code working

@krishnamalireddy
Copy link

the above Code working

Can you please provide the only code without comments and explanation

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

3 participants