diff --git a/Exercise1/Exercise1.py b/Exercise1/Exercise1.py new file mode 100644 index 0000000..3602479 --- /dev/null +++ b/Exercise1/Exercise1.py @@ -0,0 +1,66 @@ +import matplotlib.pyplot as plt +import numpy as np +import xlrd +import xlsxwriter +import math +import random + +ExcelFile=xlrd.open_workbook(r'C:\Users\Aurora\Desktop\data.xlsx') +data=ExcelFile.sheet_by_index(0) + + +size=569 +X = [[0 for i in range(30)] for j in range(size)] +W = [random.uniform(-0.3,0.3) for i in range(31)] ##随机生成权值W +Y = [0 for i in range(size)] +dL = [0 for i in range(size)] ##dL/dz +alpha=0.001 + + +for i in range (1,round(0.7*size)): ###初始化输入X + for j in range(2,32): + X[i-1][j-2]=data.cell_value(i,j) + +def sigmoid(x): + return 1/(1+math.exp(-x)) + +n=80 ##训练次数n +while(n): + sumdL=0 + + for i in range (0,round(0.7*size)-1): ##用70%训练模型 + z=0 ##z暂时储存每一次的wx,最后相加得到总的W*X + y = data.cell_value(i + 1, 1) + + for j in range(0,30): + z=z+W[j]*X[i][j]+W[30] ##z=wx+b, b为W[30] + + Y[i]=sigmoid(z) + try: + dL[i]=-y/(1+math.exp(z)) + (1-y)/(1+math.exp(-z)) + except OverflowError: + dL[i]=-y/(1+math.exp(z/10)) + (1-y)/(1+math.exp(-z/10)) ##防止计算exp发生溢出 + + sumdL=sumdL+dL[i] ##dL求和 + + for i in range(0,30): + W[i] = W[i] - alpha/30*sumdL + + n=n-1 + +correctNum=0 +for k in range (round(0.7*size),size-1): ##用另外30%测试模型 + z=0 ##z暂时储存每一次的wx,最后相加得到总的W*X + y = data.cell_value(k + 1, 1) + for j in range(0,30): + z=z+W[j]*X[k][j]+W[30] ##z=wx+b, b为W[30] + Y[k]=sigmoid(z) + + if ((y-Y[k])*(y-Y[k])<0.25): ##如果输出Y在0.5以下,认为是0类,在0.5以上,为1类 + + correctNum+=1 + +print('测试样本正确率: ',correctNum/(0.3*size)) + + + diff --git a/Exercise1/Result.png b/Exercise1/Result.png new file mode 100644 index 0000000..176fe7f Binary files /dev/null and b/Exercise1/Result.png differ diff --git a/Exercise1/data.xlsx b/Exercise1/data.xlsx new file mode 100644 index 0000000..1c343b3 Binary files /dev/null and b/Exercise1/data.xlsx differ diff --git a/Exercise1/readme.md b/Exercise1/readme.md new file mode 100644 index 0000000..844bdcd --- /dev/null +++ b/Exercise1/readme.md @@ -0,0 +1,7 @@ +Exercise1 is about logistic regression on ``Breast Cancer Wisconsin (Diagnostic) Data Set''. + +Here are the data and program files. + +Note: 1.我修改了data.xlsx,M类替换成了1,B类替换成了0,这样方便做梯度下降。 + +2.对70%数据做训练,30%数据做测试,测试正确率为81%左右 diff --git a/README.md b/README.md index 07ea68d..627c3d7 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,4 @@ # SES2020spring ## Another day +NOTE:项目实践的代码放在了SES2020spring了,不是这个SES2020spring-1. 机器学习的Exercise放在这里,因为可以这个SES2020spring-1是folked,可以作pull request