赶集网(Python)
代码:
import requests
import re
import xlwt
base_url = "https://xa.58.com/zufang/pn{}"
header = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Edg/122.0.0.0"
}
def get_data(url):
try:
resp = requests.get(url, headers=header)
if resp.status_code == 200:
print("请求成功!")
parse_data(resp.text)
else:
print("请求失败!")
except requests.RequestException as e:
print(f"请求错误: {e}")
def parse_data(data):
houses = re.findall(r'<li.+?house-cell.+?<a[^>]*?>(.*?)</ a>.+?room">(.*?)</p >.+?class="strongbox">(.*?)</b>', data,
re.DOTALL)
house_list = []
for h in houses:
sublist = [h[0].strip(), h[1].strip(), h[2].strip()]
house_list.append(sublist)
print(house_list)
save_data(house_list)
def save_data(house_list):
workbook = xlwt.Workbook()
sheet = workbook.add_sheet("赶集网1")
for row, data in enumerate(house_list):
for col, value in enumerate(data):
sheet.write(row, col, value)
workbook.save("赶集网1.xls") # 修改为.xls
if __name__ == '__main__':
for i in range(1, 6):
print(f"开始请求第{i}页数据")
get_data(base_url.format(i))