ICode9

精准搜索请尝试: 精确搜索
首页 > 编程语言> 文章详细

用 python 爬取房价信息

2021-12-09 23:02:53  阅读:166  来源: 互联网

标签:plt name get python self 爬取 房价 txt data


       这是我们python课程要求我们制作一个项目,用python爬取结果并作数据展示。

我们使用requests的方法对房价的信息做了爬取,一下就是我们所爬取的网页 

我们主要爬取的内容包括了房价的走势,上月的价格,本月的价格,和历史最高的价格和涨幅,等信息做了爬取并用matplotlib 画出了一个折线图并将其保存下来

import matplotlib.pyplot as plt
import datetime
import requests
import pinyin
import re
import os


def oneyear_m():
    x = []
    y = []
    with open("zoushi.txt", 'r', encoding='utf-8') as data1:
        for line in data1.read().split("\n"):
            data1_line = line.split(":")
            x.append(data1_line[0][5:])
            y.append(int(data1_line[1]))
    plt.figure(figsize=(28, 10))
    plt.title('一年变化图')  # 折线图标题
    plt.rcParams['font.sans-serif'] = ['SimHei']  # 显示汉字
    plt.xlabel('时间')  # x轴标题
    plt.ylabel('价格   (元/㎡)')  # y轴标题
    plt.plot(x, y, marker='o', markersize=5)  # 绘制折线图,添加数据点,设置点的大小
    for a, b in zip(x, y):
        plt.text(a, b, b, ha='center', va='bottom', fontsize=10)  # 设置数据标签位置及大小
    plt.legend(['走势'])  # 设置折线名称
    plt.savefig('一年变化图.jpg')
    plt.show()


def paint(x, y,flag):  # 小区上月价格折线图
    plt.figure(figsize=(10, 5))
    plt.title(flag)  # 折线图标题
    plt.rcParams['font.sans-serif'] = ['SimHei']  # 显示汉字
    plt.xlabel('时间')  # x轴标题
    plt.ylabel('价格   (元/㎡)')  # y轴标题
    plt.plot(x, y, marker='o', markersize=5)  # 绘制折线图,添加数据点,设置点的大小
    for a, b in zip(x, y):
        plt.text(a, b, b, ha='center', va='bottom', fontsize=10)  # 设置数据标签位置及大小
    plt.legend(['方案'])  # 设置折线名称
    plt.savefig(flag+'.jpg')
    plt.show()


def getdata_txt(txt):
    name = []
    lastmon = []
    nowmon = []
    history_max = []
    change = []

    for line in txt.read().split("\n"):
        lines = line.split(" ")
        name.append(lines[0])
        lastmon.append(int(lines[1]))
        nowmon.append(int(lines[2]))
        history_max.append(int(lines[3]))
        front = int(lines[1])
        end = int(lines[2])
        if front > end:
            temp = front - end
            change.append(-round(float(temp / front), 4))
        else:
            temp = end - front
            change.append(round(float(temp / front), 4))
    return name, lastmon, nowmon, history_max, change


def main():
    name = []
    lastmon = []
    nowmon = []
    history_max = []
    change = []
    txt = open("data_up.txt", "r", encoding='utf-8')
    name, lastmon, nowmon, history_max, change = getdata_txt(txt)
    txt = open("data_down.txt", "r", encoding='utf-8')
    name, lastmon, nowmon, history_max, change = getdata_txt(txt)
    paint(name, lastmon,"上月房价图")
    paint(name, nowmon,"本月房价图")
    paint(name, history_max,"历史最高分布图")
    paint(name, change,"增率变化图")

def get_first(s):
    # 通过pinyin.get()拿到一个汉字的拼音,利用切片拿到首个字母
    # 原因:url中间某个字段表示该网页是哪个城市
    ans = ''
    for i in s:
        if i == '重':
            ans = ans + 'c'
        else:
            ans = ans + pinyin.get(i)[0]
    return ans


def get_really_time(time):
    your_dt = datetime.datetime.fromtimestamp(int(time) / 1000)
    return your_dt.strftime("%Y-%m-%d")


class reptile:
    def __init__(self):
        self.__city = '天津'
        self.__header = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.55 Safari/537.36 Edg/96.0.1054.43'
        }

    def up_data(self, city):
        if city != '':
            self.__city = city
        else:
            print('没有得到新的城市名。')

    def write_in(self, data, *, fileName='', title='', time=False):
        # 数据写入
        flag = False
        with open(fileName, 'w', encoding='utf-8') as fp:
            if not title == '':
                fp.write(title + '\n')
            if time:
                for i, j in data:
                    if flag:
                        fp.write('\n')
                    else:
                        flag = True
                    fp.write(str(get_really_time(i)) + ':')
                    fp.write(str(j))
            else:
                for i, j in data.items():
                    if flag:
                        fp.write('\n')
                    else:
                        flag = True
                    fp.write(i + ' ')
                    for k in j:
                        fp.write(k + ' ')


    def show_all(self):
        oneyear_m()
        main()

    def get_photo_data(self):  # 获取目标城市的总体价格走势图的数据
        url = 'http://' + get_first(self.__city) + '.fangjia.com/trend/yearData?'
        param = {
            'defaultCityName': self.__city,
            'districtName': '',
            'region': '',
            'block': '',
            'keyword': ''
        }
        res = requests.get(url=url, params=param, headers=self.__header).json()
        data = res['series']
        d = data[0]['data']
        # 文件写入
        self.write_in(d, fileName='zoushi.txt', time=True)

    def get_which(self, choose='up'):
        url = 'http://' + get_first(self.__city) + '.fangjia.com/zoushi'

        page_txt = requests.get(url=url, headers=self.__header).text
        if choose == 'up':
            ex = '<div class="trend trend03">.*?<tbody>(.*?)<tbody>'
        else:
            ex = '<div class="trend trend03" style="border-bottom:none;">.*?<tbody>(.*?)</tbody>'
        url_list = str(re.findall(ex, page_txt, re.S)[0])
        ex = '<tr class=".*?">(.*?)</tr>'
        all = str(re.findall(ex, url_list, re.S))
        ex_name = '<td class="td02"><a href=".*?">(.*?)</a></td>'
        ex_data = '<td>(.*?)</td>'
        need_name = re.findall(ex_name, all, re.S)
        need_data = re.findall(ex_data, all, re.S)
        need_data = [i for i in need_data if not i == '元/㎡' and not i == '周度']
        d = {}
        i = 1
        for house_name in need_name:
            d[house_name] = need_data[4 * (i - 1):4 * i]
            i += 1
        self.write_in(d, fileName='data_' + choose + '.txt')


if __name__ == '__main__':  # 程序入口

    a = reptile()
    postion = input("请输入城市\n")
    print(get_first(postion))
    if not os.path.exists (postion):
        os.mkdir(postion)
    os.chdir(postion)
    a.up_data(postion)
    a.get_which()
    a.get_which('down')
    a.get_photo_data()
    a.show_all()

标签:plt,name,get,python,self,爬取,房价,txt,data
来源: https://blog.csdn.net/qq_43216483/article/details/121845971

本站声明: 1. iCode9 技术分享网(下文简称本站)提供的所有内容,仅供技术学习、探讨和分享;
2. 关于本站的所有留言、评论、转载及引用,纯属内容发起人的个人观点,与本站观点和立场无关;
3. 关于本站的所有言论和文字,纯属内容发起人的个人观点,与本站观点和立场无关;
4. 本站文章均是网友提供,不完全保证技术分享内容的完整性、准确性、时效性、风险性和版权归属;如您发现该文章侵犯了您的权益,可联系我们第一时间进行删除;
5. 本站为非盈利性的个人网站,所有内容不会用来进行牟利,也不会利用任何形式的广告来间接获益,纯粹是为了广大技术爱好者提供技术内容和技术思想的分享性交流网站。

专注分享技术,共同学习,共同进步。侵权联系[81616952@qq.com]

Copyright (C)ICode9.com, All Rights Reserved.

ICode9版权所有