ICode9

精准搜索请尝试: 精确搜索
首页 > 数据库> 文章详细

获取最新citysql 城市sql 国家统计局最新市区分布 thinkphp php 抓取

2020-03-31 11:09:04  阅读:244  来源: 互联网

标签:town city citysql name county 最新 sql id match


    /**
     * # +========================================================================
     * # | - @name        抓取全国统计用区划代码和城乡划分代码
     * # | - @author     cq <just_leaf@foxmail.com> 
     * # | - @copyright zmtek 2019-12-26
     * # +------------------------------------------------------------------------
     * # | - 1.http://www.stats.gov.cn/ - 国家统计局官网
     * # +========================================================================
     */
    public function getcity() {
        
        header("Content-type: text/html; charset=gb2312");
        # 超时设置
        set_time_limit(0);

        $index = file_get_contents("http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2019/index.html");

        $url = 'http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2019/';
        error_reporting(0);
        $prov = array(
            array(),
            array(11, 12, 13, 14, 15, 21, 22, 23, 31, 32, 33, 34, 35, 36, 37, 41, 42, 43, 44, 45, 46, 50, 51, 52, 53, 54, 61, 62, 63, 64, 65),
            array('北京市', '天津市', '河北省', '山西省', '内蒙古自治区', '辽宁省', '吉林省', '黑龙江省',
                '上海市', '江苏省', '浙江省', '安徽省', '福建省', '江西省', '山东省', '河南省',
                '湖北省', '湖南省', '广东省', '广西壮族自治区', '海南省', '重庆市', '四川省', '贵州省',
                '云南省', '西藏自治区', '陕西省', '甘肃省', '青海省', '宁夏回族自治区', '新疆维吾尔自治区',
            ),
        );

        $matches = $prov;
        $i = 0; 
        for ($i = 0, $e = count($matches[1]); $i < $e; $i++)
        {
            
            preg_match_all('/<a href=\'\d{2}\/(.{1,30}).html\'>(.{1,30})<\/a><\/td><\/tr>/', $index, $matche);
            for ($a = 0, $b = count($matche[1]); $a < $b; $a++)
            {    

                $index = file_get_contents($url . $matches[1][$i] . '/' . $matche[1][$a] . '.html');
                preg_match_all('/<a href=\'\d{2}\/(.{1,30}).html\'>(.{1,30})<\/a><\/td><\/tr>/', $index, $match);
                for ($c = 0, $d = count($match[1]); $c < $d; $c++)
                {
                    //补0处理
                    $provinces_id      = str_pad($matches[1][$i], 12, "0", STR_PAD_RIGHT);
                    $province_name     = iconv("GB2312", "UTF-8", $matches[2][$i]);
                    $city_id         = str_pad($matche[1][$a], 12, "0", STR_PAD_RIGHT);
                    $city_name         = iconv("GB2312", "UTF-8", $matche[2][$a]);
                    
                    $aru   = substr($matche[1][$a], 2, 2);
                    $index = file_get_contents($url . $matches[1][$i] . '/' . $aru . '/' . $match[1][$c] . '.html');
                    preg_match_all('/<a href=\'\d{2}\/(.{1,30}).html\'>(.{1,30})<\/a><\/td><\/tr>/', $index, $matc);
                    //部分省市的html和大部分的不一样,重写规则
                    if(!$matc[0]) preg_match_all('/<td>(.{1,30})<\/td><td>\d{1,10}<\/td><td>(.{1,30})<\/td><\/tr>/', $index, $matc);
                    for ($v = 0, $n = count($matc[1]); $v < $n; $v++)
                    {
                        $county_id     = str_pad($match[1][$c], 12, "0", STR_PAD_RIGHT);
                        $county_name= iconv("GB2312", "UTF-8", $match[2][$c]);
                        $town_id     = str_pad($matc[1][$v], 12, "0", STR_PAD_RIGHT);
                        $town_name     = iconv("GB2312", "UTF-8", $matc[2][$v]);
                        
                        $aru2   = substr($matche[1][$a],-2);
                        $index = file_get_contents($url . $matches[1][$i] . '/' . $aru . '/' .$aru2 .'/'. $matc[1][$c] . '.html');
                        preg_match_all('/<a href=\'\d{2}\/(.{1,30}).html\'>(.{1,30})<\/a><\/td><\/tr>/', $index, $mat);
                        //部分省市的html和大部分的不一样,重写规则
                        if(!$mat[0]) preg_match_all('/<td>(.{1,30})<\/td><td>\d{1,10}<\/td><td>(.{1,30})<\/td><\/tr>/', $index, $mat);
                        
                        if(!$mat[0]) {

                            $add = array(
                                'province_id'     => $provinces_id ,
                                'province_name'    => $province_name ,
                                'city_id'         => $city_id ,
                                'city_name'     => $city_name ,
                                'county_id'     => $county_id ,
                                'county_name'     => $county_name ,
                                'town_id'         => $town_id,
                                'town_name'        => $town_name 
                            );

                            M('position') -> add($add);
                        }else{
                            for($z = 0, $x = count($mat[1]); $z < $x; $z++){
                                
                                $housing_id     = $mat[1][$z];
                                $housing_name     = iconv("GB2312", "UTF-8", $mat[2][$z]);
                                
                                $add = array(
                                    'province_id'     => $provinces_id ,
                                    'province_name'    => $province_name ,
                                    'city_id'         => $city_id ,
                                    'city_name'     => $city_name ,
                                    'county_id'     => $county_id ,
                                    'county_name'     => $county_name ,
                                    'town_id'         => $town_id,
                                    'town_name'        => $town_name ,
                                    'housing_id'     => $housing_id ,
                                    'housing_name'     => $housing_name ,
                                );

                                M('position') -> add($add);
                            }
                        }
                    }
                }
            }
        }
    }
CREATE TABLE `gd_position` (
  `id` int(10) unsigned NOT NULL AUTO_INCREMENT,
  `province_id` varchar(20) DEFAULT NULL,
  `province_name` varchar(255) CHARACTER SET utf8mb4 DEFAULT '',
  `city_id` varchar(20) DEFAULT NULL,
  `city_name` varchar(255) CHARACTER SET utf8mb4 DEFAULT '',
  `county_id` varchar(20) DEFAULT NULL,
  `county_name` varchar(255) CHARACTER SET utf8mb4 DEFAULT '',
  `town_id` varchar(20) DEFAULT NULL,
  `town_name` varchar(255) CHARACTER SET utf8mb4 DEFAULT '',
  `housing_id` varchar(20) DEFAULT NULL,
  `housing_name` varchar(255) DEFAULT NULL,
  PRIMARY KEY (`id`)
) ENGINE=MyISAM AUTO_INCREMENT=821 DEFAULT CHARSET=utf8;

 

标签:town,city,citysql,name,county,最新,sql,id,match
来源: https://www.cnblogs.com/leaf-cq/p/12603680.html

本站声明: 1. iCode9 技术分享网(下文简称本站)提供的所有内容,仅供技术学习、探讨和分享;
2. 关于本站的所有留言、评论、转载及引用,纯属内容发起人的个人观点,与本站观点和立场无关;
3. 关于本站的所有言论和文字,纯属内容发起人的个人观点,与本站观点和立场无关;
4. 本站文章均是网友提供,不完全保证技术分享内容的完整性、准确性、时效性、风险性和版权归属;如您发现该文章侵犯了您的权益,可联系我们第一时间进行删除;
5. 本站为非盈利性的个人网站,所有内容不会用来进行牟利,也不会利用任何形式的广告来间接获益,纯粹是为了广大技术爱好者提供技术内容和技术思想的分享性交流网站。

专注分享技术,共同学习,共同进步。侵权联系[81616952@qq.com]

Copyright (C)ICode9.com, All Rights Reserved.

ICode9版权所有