今日内容

  • 对python中numpy与pandas两个库做了学习
  • numpy的基本操作
# -*- coding = utf-8 -*-
# @Time: 2021/10/10 10:58
# @Author: 闲卜
# @File: test.py
# @Software: PyCharm

import numpy as np
if __name__ == "__main__":
    a = np.array([[1,  3], [3, 5]])
    print(a)
    print(a.ndim)
    print(a.shape)
    print(a.size)
    b = np.zeros((3, 4))  # 生成三行四列的0矩阵
    print(b)
    b = np.ones((3, 4))  # 生成三行四列的1矩阵
    print(b)
    b = np.arange(10, 20, 2)  # 生成从10开始, 20结束,步长为2的序列
    print(b)
    b = np.arange(12).reshape(3, 4)  # 生成0-11的 3行4列的矩阵
    print(b)
    b = np.linspace(1, 10, 20).reshape(4, 5)  # 生成从1开始到10结束,中间20段的序列
    # [[1.          1.47368421  1.94736842  2.42105263  2.89473684]
    # [3.36842105  3.84210526  4.31578947  4.78947368  5.26315789]
    # [5.73684211 6.21052632 6.68421053 7.15789474 7.63157895]
    # [8.10526316  8.57894737  9.05263158  9.52631579 10.]]
    print(b)
    c = np.sin(a)  # 对a中的所有数求sin
    print(c)

    c = a*a  # 矩阵逐个相乘
    c_dot = np.dot(a, a)  # 矩阵乘法
    c_dot_2 = a.dot(a)  # 效果也是矩阵乘法
    print(c)
    print(c_dot)
    print("---------------------------")
    a = np.random.random((2, 4))  # 产生一个2行4列范围为0-1随机数的矩阵
    print(a)
    a = np.array([[0, 2, 3, 5], [5, 5, 7, 9]])
    print(a)
    print(np.sum(a))  # 求和
    print(np.max(a))  # 最大
    print(np.min(a))  # 最小
    print(np.sum(a, axis=0))  # 将每一列求和
    print(np.max(a, axis=1))  # 输出每一行中最大的数
    print(np.min(a, axis=0))  # 输出每一列中最小的数
    print(np.argmin(a))  # 出去a中最小值索引,从0开始
    print(np.mean(a))  # 求出a的平均值
    print(np.average(a))  # 求出a的平均值
    print(a.mean())  # 同上
    print(np.median(a))  # 中位数
    print(a)
    print(np.cumsum(a))  # 将a中的元素累加并赋值给该位置
    print(a.cumsum())  # 同上
    print(np.diff(a))  # 每两个数中间的差
    print(np.nonzero(a))
    # 行数非零array([0, 0, 0, 1, 1, 1, 1], dtype=int64),
    # 列数非0 array([1, 2, 3, 0, 1, 2, 3], dtype=int64)
    print(np.transpose(a))  # 矩阵的转置
    print(a.T)  # 同上
    print(np.clip(a, 5, 9))  # 所有小于5的数让他等于5,所有大于9的数让他等于9

    print(a.flatten())  # 把目标矩阵转换成一维的矩阵
    for item in a.flat:  # a.flat是个迭代器 ,flat水平的
        print(item)

    print("---------------------------------")
    a = np.array([1, 2, 3])
    b = np.array([4, 6, 5])
    print(np.vstack((a, b)))  # 将a,b按上下合并 vertical(垂直的) stack
    print(np.hstack((a, b)))  # 将a,b按左右合并 horizontal(水平的) stack
    print(np.concatenate((a, b, a), axis=0))  # 多个矩阵的合并 axis=0是垂直合并,axis=1是水平方向

    print("---------------------")

    a = np.arange(12).reshape(3, 4)
    print(np.split(a, 1, axis=0))

  • pandas与numpy的结合
import numpy as np
import pandas as pd

if __name__ == "__main__":
    a = pd.Series([1, 3, 6, np.nan, 41, 3])
    print(a)
    dates = pd.date_range("20211010", periods=6)
    print(dates)
    # 索引:index是行,columns是列,也可以用字典的方式定义数据格式,rand是0-1的均匀分布,
    # randn是均值为0方差为1的标准正态分布
    df = pd.DataFrame(np.random.randn(6, 6), index=dates, columns=['a', 'b', 'c', 'd', 'd', 'e'])

    print(df)
    print(df.dtypes)
    print(df.describe())
    print(df.sort_index(axis=1, ascending=False))  # 按列倒着输出
    print(df.sort_values(by='a', ascending=False))  # 按a从大到小排序
    df.iloc[3, 1] = 9  # 通过定位改变值
    df.loc['20211013', 'a'] = 9  # 通过索引改变值
    df[df.c > 0] = 0  # 把df中b列大于0的数改变为0
    df['F'] = np.nan  # 添加列
    print(df)
    print(df.dropna(axis=1, how='any'))  # 删除有Nan值的所有列
    print(df.fillna('0'))  # 将空值填成0
    print(df.isnull())  # 判断哪些位置是空
    
    
    df1 = pd.DataFrame(np.ones((3, 4))*0, columns=['a', 'b', 'c', 'd'])
    df2 = pd.DataFrame(np.ones((3, 4))*1, columns=['a', 'b', 'c', 'd'])
    df3 = pd.DataFrame(np.ones((3, 4))*2, columns=['a', 'b', 'c', 'd'])
    print(df1)
    print(df2)
    print(df3)
    res = pd.concat([df1, df2, df3], axis=0, ignore_index=True)
    print(res)

  • 可视化matplotlib的基本使用
  # -*- coding = utf-8 -*-
  # @Time: 2021/10/10 20:02
  # @Author: 闲卜
  # @File: test3.py
  # @Software: PyCharm
  import numpy as np
  import pandas as pd
  import matplotlib.pyplot as plt
  if __name__ == '__main__':
      # Series线性数据
      data = pd.Series(np.random.randn(1000), index=np.arange(1000))  # 生成1000个数据
      data = data.cumsum()  # 累加
  
      # dataframe矩阵数据
      data = pd.DataFrame(np.random.randn(1000, 4),
                          index=np.arange(1000),
                          columns=list("ABCD"))
  
      data = data.cumsum()  # 累加
      # data.plot()  # 存入数据
      ax = data.plot.scatter(x="A", y="B", color="Darkblue", label="class1")
      # ax指的是图的名字,下式表示在ax=ax的图上进行再绘画
      data = data.plot.scatter(x="A", y="D", label="class2", ax=ax)
      plt.show()
      print(data)
  
  • tkinter中的画线方法

    # -*- coding = utf-8 -*-
    # @Time: 2021/10/10 21:45
    # @Author: 闲卜
    # @File: cas.py
    # @Software: PyCharm
    import tkinter as tk
    if __name__ == "__main__":
        window = tk.Tk()
        cancas = tk.Canvas(window, bg='blue', height=100, width=200)
        x0, x1, y0, y1 = 50, 50, 80, 80
        cancas.create_line(x0, x1, y0, y1)  # 从(50,50)划到(80,80)
        oval = cancas.create_oval(x0, x1, y0, y1, fill='red')  # 圆形
        arc = cancas.create_arc(x0+30, x1+30, y0+30, y1+30, start=0, extent=70)  # 扇形 从0到70°。
        rect = cancas.create_rectangle(x0, x1, y0, y1)  # 正方形
        def moveit():
            cancas.move(rect, 0, 2)
    
        tk.Button(window, text='move', command=moveit).pack()
        cancas.pack()
        tk.mainloop()
    
    

明日计划

  • 完成q-learning算法的案例实现