python-pymysql 操作数据库

创建数据库-表

#导入pymysql
import pymysql


# 创建连接
DBHOST = 'localhost'
DBUSER = 'root'
DBPASS = 'lxl123456'
# DBNAME = 'test'

# pymysql的接口获取链接
def mysql_conn(host, user, password):
    # 传参版本
    try:
        conn = pymysql.connect(host=host, user=user, password=password)
        print('数据库连接成功!')
    except pymysql.Error as e:
        print("数据库连接失败:" + str(e))
    return conn

# 先创建cursor负责操作conn接口
conn = mysql_conn(DBHOST, DBUSER, DBPASS)
cursor=conn.cursor() #设置游标

# 创建数据库的sql(如果数据库存在就不创建,防止异常)
sql = "CREATE DATABASE IF NOT EXISTS test" 
# 执行创建数据库的sql
cursor.execute(sql)
cursor.execute("use test")
# 创建表
sql_2 = '''CREATE TABLE `express2` (
  `运单号` bigint NOT NULL AUTO_INCREMENT,
  `物流单号` VARCHAR(80),
  `物流宝单号` VARCHAR(80),
  `行业` VARCHAR(80),
  `货主` VARCHAR(80),
  `是否保价` VARCHAR(40),
  `是否催派` VARCHAR(40),
  `是否工单发起` VARCHAR(40),
  `是否预售下沉` VARCHAR(40),
  `停滞状态` VARCHAR(80),
  `停滞时长` INT,
  `停滞要求时长` INT,
  `是否超停滞要求时长` VARCHAR(80),
  `物流停滞节点` VARCHAR(80),
  `发货大区` VARCHAR(80),
  `发货省` VARCHAR(80),
  `仓code` VARCHAR(80),
  `仓库` VARCHAR(80),
  `配送公司` VARCHAR(150),
  `配送cp` VARCHAR(150),
  `配送类型` VARCHAR(80),
  `配送大区` VARCHAR(80),
  `收货大区` VARCHAR(80),
  `收货省` VARCHAR(80),
  `收货市` VARCHAR(80),
  `收货区县` VARCHAR(80),
  `区域类型` VARCHAR(80),
  `一级分拨` VARCHAR(80),
  `二级分拨` VARCHAR(80),
  `末分拨` VARCHAR(80),
  `网点id` VARCHAR(80),
  `网点` VARCHAR(80),
  `支付时间` VARCHAR(80),
  `创建时间` VARCHAR(80),
  `接单时间` VARCHAR(80),
  `出库时间` VARCHAR(80),
  `入交接区时间` VARCHAR(80),
  `揽收时间` VARCHAR(80),
  `一级分拨入时间` VARCHAR(80),
  `一级分拨出时间` VARCHAR(80),
  `二级分拨入时间` VARCHAR(80),
  `二级分拨出时间` VARCHAR(80),
  `末分拨入时间` VARCHAR(80),
  `末分拨出时间` VARCHAR(80),
  `网点入时间` VARCHAR(80),
  `领件时间` VARCHAR(80),
  `配送成功时间` VARCHAR(80),
  `配送应签时间` VARCHAR(80),
  PRIMARY KEY (`运单号`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
'''
cursor.execute(sql_2)

#关闭连接和游标
conn.close()
cursor.close()

 

写入数据库

#导入类
import numpy as np
import pandas as pd
import pymysql

info = pd.read_excel(r'/Users/linxianli/Desktop/数据.xlsx')
info.shape
'''
(443378, 48)
'''

# 将 NAN 转化为 None
info = info.astype(object).where(pd.notnull(info), None)

# 创建连接
DBHOST = 'localhost'
DBUSER = 'root'
DBPASS = 'lxl123456'
DBNAME = 'test'

# pymysql的接口获取链接
def mysql_conn(host, user, password, db, port=3306, charset="utf8"):
    # 传参版本
    try:
        conn = pymysql.connect(host=host, user=user, password=password, database=db, port=port, charset=charset)
        print('数据库连接成功!')
    except pymysql.Error as e:
        print("数据库连接失败:" + str(e))
    return conn

def write_database():
    # 先创建cursor负责操作conn接口
    conn = mysql_conn(DBHOST, DBUSER, DBPASS, DBNAME)
    cursor=conn.cursor() #设置游标

    # # 开启事务
    conn.begin()

    # 构造符合sql语句的列,因为sql语句是带有逗号分隔的,(这个对应上面的sql语句的(column1, column2, column3))
    columns = ','.join(list(info.columns))

    # 构造每个列对应的数据,对应于上面的((value1, value2, value3))
    data_list = [tuple(i) for i in info.values] # 每个元组都是一条数据,根据df行数生成多少元组数据

    # 计算一行有多少value值需要用字符串占位
    s_count = len(data_list[0]) * "%s,"

    # 构造sql语句
    insert_sql = "insert into " + "express2" + " (" + columns + ") values (" + s_count[:-1] + ")"

    try:
        res = cursor.executemany(insert_sql, data_list)
        print('执行sql受影响的行数:',res)
        conn.commit()
        cursor.close()
        conn.close()
    except Exception as e:
        # 万一失败了,要进行回滚操作
        conn.rollback()
        cursor.close()
        conn.close()

start_time = time.time()
write_database()
stop_time = time.time()
print('run time is %s' % (stop_time - start_time))
'''
数据库连接成功!
执行sql受影响的行数: 443378
run time is 96.30252575874329
'''

 

多线程写入数据库

import numpy as np
import pandas as pd
import time, requests
import pymysql
from concurrent.futures import ProcessPoolExecutor

info = pd.read_excel(r'/Users/linxianli/Desktop/数据.xlsx')

# 将 NAN 转化为 None
info = info.astype(object).where(pd.notnull(info), None)

# 创建连接
DBHOST = 'localhost'
DBUSER = 'root'
DBPASS = 'lxl123456'
DBNAME = 'test'

# pymysql的接口获取链接
def mysql_conn(host, user, password, db, port=3306, charset="utf8"):
    # 传参版本
    try:
        conn = pymysql.connect(host=host, user=user, password=password, database=db, port=port, charset=charset)
        print('数据库连接成功!')
    except pymysql.Error as e:
        print("数据库连接失败:" + str(e))
    return conn

def data_handler(info):
    conn = mysql_conn(DBHOST, DBUSER, DBPASS, DBNAME)
    cursor=conn.cursor() #设置游标

    # # 开启事务
    conn.begin()

    # 构造符合sql语句的列,因为sql语句是带有逗号分隔的,(这个对应上面的sql语句的(column1, column2, column3))
    columns = ','.join(list(info.columns))

    # 构造每个列对应的数据,对应于上面的((value1, value2, value3))
    data_list = [tuple(i) for i in info.values] # 每个元组都是一条数据,根据df行数生成多少元组数据

    # 计算一行有多少value值需要用字符串占位
    s_count = len(data_list[0]) * "%s,"

    # 构造sql语句
    insert_sql = "insert into " + "express2" + " (" + columns + ") values (" + s_count[:-1] + ")"

    try:
        res = cursor.executemany(insert_sql, data_list)
        print('执行sql受影响的行数:',res)
        conn.commit()
        cursor.close()
        conn.close()
    except Exception as e:
        # 万一失败了,要进行回滚操作
        conn.rollback()
        cursor.close()
        conn.close()

def run():
    urls = [info[0:100000],info[100000:200000],info[200000:300000],info[300000:450000]]
    with ProcessPoolExecutor() as excute:
        excute.map(data_handler,urls)  ##ProcessPoolExecutor 提供的map函数,可以直接接受可迭代的参数,并且结果可以直接for循环取出

start_time = time.time()
run()
stop_time = time.time()
print('run time is %s' % (stop_time - start_time))
'''
数据库连接成功!
数据库连接成功!
数据库连接成功!
数据库连接成功!
执行sql受影响的行数: 100000
执行sql受影响的行数: 100000
执行sql受影响的行数: 100000
执行sql受影响的行数: 143378
run time is 57.68310880661011
'''

 

查询数据库表数据-清空表数据

# 导入类
import numpy as np
import pandas as pd
import pymysql

# 创建连接
DBHOST = 'localhost'
DBUSER = 'root'
DBPASS = 'lxl123456'
DBNAME = 'test'

# pymysql的接口获取链接
def mysql_conn(host, user, password, db, port=3306, charset="utf8"):
    # 传参版本
    try:
        conn = pymysql.connect(host=host, user=user, password=password, database=db,port=port, charset=charset)
        print('数据库连接成功!')
    except pymysql.Error as e:
        print("数据库连接失败:" + str(e))
    return conn

# 先创建cursor负责操作conn接口
conn = mysql_conn(DBHOST, DBUSER, DBPASS, DBNAME)
cursor=conn.cursor() #设置游标

def mysql(sql):
    cursor.execute(sql) #执行查询语句
    jieguo=cursor.fetchall() #查看全部查询结果
    cols=cursor.description #类似 desc table_name返回结果
    col=[] #创建一个空列表以存放列名
    for v in cols:
        col.append(v[0]) #循环提取列名,并添加到col空列表
    dfsql=pd.DataFrame(jieguo,columns=col) #将查询结果转换成DF结构,并给列重新赋值
#     if dfsql.empty:
#         return 'empty set' #判断查询结果为空时返回的值
#     else:   
    return dfsql #以DF结构返回查询结构,DF.to_excel...导出查询结果时可以带列名,这样就解决了mysql直接导出结果无列名的问题

df = mysql('''
    select * from express2
''')

conn.close()
cursor.close()

df.shape
'''
(443378, 48)
'''

# 清空表数据
# 先创建cursor负责操作conn接口
conn = mysql_conn(DBHOST, DBUSER, DBPASS, DBNAME)
cursor=conn.cursor() #设置游标

sql = '''TRUNCATE TABLE `express2`;'''
cursor.execute(sql)
print('清空表数据成功!')

conn.close()
cursor.close()
'''
数据库连接成功!
清空表数据成功!
'''

 

 

代码还有很多地方可以优化的地方可以慢慢调整

 

 

请你一定不要停下来 成为你想成为的人

感谢您的阅读,我是LXL