python-pymysql 操作数据库
创建数据库-表
#导入pymysql
import pymysql
# 创建连接
DBHOST = 'localhost'
DBUSER = 'root'
DBPASS = 'lxl123456'
# DBNAME = 'test'
# pymysql的接口获取链接
def mysql_conn(host, user, password):
# 传参版本
try:
conn = pymysql.connect(host=host, user=user, password=password)
print('数据库连接成功!')
except pymysql.Error as e:
print("数据库连接失败:" + str(e))
return conn
# 先创建cursor负责操作conn接口
conn = mysql_conn(DBHOST, DBUSER, DBPASS)
cursor=conn.cursor() #设置游标
# 创建数据库的sql(如果数据库存在就不创建,防止异常)
sql = "CREATE DATABASE IF NOT EXISTS test"
# 执行创建数据库的sql
cursor.execute(sql)
cursor.execute("use test")
# 创建表
sql_2 = '''CREATE TABLE `express2` (
`运单号` bigint NOT NULL AUTO_INCREMENT,
`物流单号` VARCHAR(80),
`物流宝单号` VARCHAR(80),
`行业` VARCHAR(80),
`货主` VARCHAR(80),
`是否保价` VARCHAR(40),
`是否催派` VARCHAR(40),
`是否工单发起` VARCHAR(40),
`是否预售下沉` VARCHAR(40),
`停滞状态` VARCHAR(80),
`停滞时长` INT,
`停滞要求时长` INT,
`是否超停滞要求时长` VARCHAR(80),
`物流停滞节点` VARCHAR(80),
`发货大区` VARCHAR(80),
`发货省` VARCHAR(80),
`仓code` VARCHAR(80),
`仓库` VARCHAR(80),
`配送公司` VARCHAR(150),
`配送cp` VARCHAR(150),
`配送类型` VARCHAR(80),
`配送大区` VARCHAR(80),
`收货大区` VARCHAR(80),
`收货省` VARCHAR(80),
`收货市` VARCHAR(80),
`收货区县` VARCHAR(80),
`区域类型` VARCHAR(80),
`一级分拨` VARCHAR(80),
`二级分拨` VARCHAR(80),
`末分拨` VARCHAR(80),
`网点id` VARCHAR(80),
`网点` VARCHAR(80),
`支付时间` VARCHAR(80),
`创建时间` VARCHAR(80),
`接单时间` VARCHAR(80),
`出库时间` VARCHAR(80),
`入交接区时间` VARCHAR(80),
`揽收时间` VARCHAR(80),
`一级分拨入时间` VARCHAR(80),
`一级分拨出时间` VARCHAR(80),
`二级分拨入时间` VARCHAR(80),
`二级分拨出时间` VARCHAR(80),
`末分拨入时间` VARCHAR(80),
`末分拨出时间` VARCHAR(80),
`网点入时间` VARCHAR(80),
`领件时间` VARCHAR(80),
`配送成功时间` VARCHAR(80),
`配送应签时间` VARCHAR(80),
PRIMARY KEY (`运单号`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
'''
cursor.execute(sql_2)
#关闭连接和游标
conn.close()
cursor.close()
写入数据库
#导入类
import numpy as np
import pandas as pd
import pymysql
info = pd.read_excel(r'/Users/linxianli/Desktop/数据.xlsx')
info.shape
'''
(443378, 48)
'''
# 将 NAN 转化为 None
info = info.astype(object).where(pd.notnull(info), None)
# 创建连接
DBHOST = 'localhost'
DBUSER = 'root'
DBPASS = 'lxl123456'
DBNAME = 'test'
# pymysql的接口获取链接
def mysql_conn(host, user, password, db, port=3306, charset="utf8"):
# 传参版本
try:
conn = pymysql.connect(host=host, user=user, password=password, database=db, port=port, charset=charset)
print('数据库连接成功!')
except pymysql.Error as e:
print("数据库连接失败:" + str(e))
return conn
def write_database():
# 先创建cursor负责操作conn接口
conn = mysql_conn(DBHOST, DBUSER, DBPASS, DBNAME)
cursor=conn.cursor() #设置游标
# # 开启事务
conn.begin()
# 构造符合sql语句的列,因为sql语句是带有逗号分隔的,(这个对应上面的sql语句的(column1, column2, column3))
columns = ','.join(list(info.columns))
# 构造每个列对应的数据,对应于上面的((value1, value2, value3))
data_list = [tuple(i) for i in info.values] # 每个元组都是一条数据,根据df行数生成多少元组数据
# 计算一行有多少value值需要用字符串占位
s_count = len(data_list[0]) * "%s,"
# 构造sql语句
insert_sql = "insert into " + "express2" + " (" + columns + ") values (" + s_count[:-1] + ")"
try:
res = cursor.executemany(insert_sql, data_list)
print('执行sql受影响的行数:',res)
conn.commit()
cursor.close()
conn.close()
except Exception as e:
# 万一失败了,要进行回滚操作
conn.rollback()
cursor.close()
conn.close()
start_time = time.time()
write_database()
stop_time = time.time()
print('run time is %s' % (stop_time - start_time))
'''
数据库连接成功!
执行sql受影响的行数: 443378
run time is 96.30252575874329
'''
多线程写入数据库
import numpy as np
import pandas as pd
import time, requests
import pymysql
from concurrent.futures import ProcessPoolExecutor
info = pd.read_excel(r'/Users/linxianli/Desktop/数据.xlsx')
# 将 NAN 转化为 None
info = info.astype(object).where(pd.notnull(info), None)
# 创建连接
DBHOST = 'localhost'
DBUSER = 'root'
DBPASS = 'lxl123456'
DBNAME = 'test'
# pymysql的接口获取链接
def mysql_conn(host, user, password, db, port=3306, charset="utf8"):
# 传参版本
try:
conn = pymysql.connect(host=host, user=user, password=password, database=db, port=port, charset=charset)
print('数据库连接成功!')
except pymysql.Error as e:
print("数据库连接失败:" + str(e))
return conn
def data_handler(info):
conn = mysql_conn(DBHOST, DBUSER, DBPASS, DBNAME)
cursor=conn.cursor() #设置游标
# # 开启事务
conn.begin()
# 构造符合sql语句的列,因为sql语句是带有逗号分隔的,(这个对应上面的sql语句的(column1, column2, column3))
columns = ','.join(list(info.columns))
# 构造每个列对应的数据,对应于上面的((value1, value2, value3))
data_list = [tuple(i) for i in info.values] # 每个元组都是一条数据,根据df行数生成多少元组数据
# 计算一行有多少value值需要用字符串占位
s_count = len(data_list[0]) * "%s,"
# 构造sql语句
insert_sql = "insert into " + "express2" + " (" + columns + ") values (" + s_count[:-1] + ")"
try:
res = cursor.executemany(insert_sql, data_list)
print('执行sql受影响的行数:',res)
conn.commit()
cursor.close()
conn.close()
except Exception as e:
# 万一失败了,要进行回滚操作
conn.rollback()
cursor.close()
conn.close()
def run():
urls = [info[0:100000],info[100000:200000],info[200000:300000],info[300000:450000]]
with ProcessPoolExecutor() as excute:
excute.map(data_handler,urls) ##ProcessPoolExecutor 提供的map函数,可以直接接受可迭代的参数,并且结果可以直接for循环取出
start_time = time.time()
run()
stop_time = time.time()
print('run time is %s' % (stop_time - start_time))
'''
数据库连接成功!
数据库连接成功!
数据库连接成功!
数据库连接成功!
执行sql受影响的行数: 100000
执行sql受影响的行数: 100000
执行sql受影响的行数: 100000
执行sql受影响的行数: 143378
run time is 57.68310880661011
'''
查询数据库表数据-清空表数据
# 导入类
import numpy as np
import pandas as pd
import pymysql
# 创建连接
DBHOST = 'localhost'
DBUSER = 'root'
DBPASS = 'lxl123456'
DBNAME = 'test'
# pymysql的接口获取链接
def mysql_conn(host, user, password, db, port=3306, charset="utf8"):
# 传参版本
try:
conn = pymysql.connect(host=host, user=user, password=password, database=db,port=port, charset=charset)
print('数据库连接成功!')
except pymysql.Error as e:
print("数据库连接失败:" + str(e))
return conn
# 先创建cursor负责操作conn接口
conn = mysql_conn(DBHOST, DBUSER, DBPASS, DBNAME)
cursor=conn.cursor() #设置游标
def mysql(sql):
cursor.execute(sql) #执行查询语句
jieguo=cursor.fetchall() #查看全部查询结果
cols=cursor.description #类似 desc table_name返回结果
col=[] #创建一个空列表以存放列名
for v in cols:
col.append(v[0]) #循环提取列名,并添加到col空列表
dfsql=pd.DataFrame(jieguo,columns=col) #将查询结果转换成DF结构,并给列重新赋值
# if dfsql.empty:
# return 'empty set' #判断查询结果为空时返回的值
# else:
return dfsql #以DF结构返回查询结构,DF.to_excel...导出查询结果时可以带列名,这样就解决了mysql直接导出结果无列名的问题
df = mysql('''
select * from express2
''')
conn.close()
cursor.close()
df.shape
'''
(443378, 48)
'''
# 清空表数据
# 先创建cursor负责操作conn接口
conn = mysql_conn(DBHOST, DBUSER, DBPASS, DBNAME)
cursor=conn.cursor() #设置游标
sql = '''TRUNCATE TABLE `express2`;'''
cursor.execute(sql)
print('清空表数据成功!')
conn.close()
cursor.close()
'''
数据库连接成功!
清空表数据成功!
'''
代码还有很多地方可以优化的地方可以慢慢调整
请你一定不要停下来 成为你想成为的人
感谢您的阅读,我是LXL