Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions Exercise1/Exercise1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import matplotlib.pyplot as plt
import numpy as np
import xlrd
import xlsxwriter
import math
import random

ExcelFile=xlrd.open_workbook(r'C:\Users\Aurora\Desktop\data.xlsx')
data=ExcelFile.sheet_by_index(0)


size=569
X = [[0 for i in range(30)] for j in range(size)]
W = [random.uniform(-0.3,0.3) for i in range(31)] ##随机生成权值W
Y = [0 for i in range(size)]
dL = [0 for i in range(size)] ##dL/dz
alpha=0.001


for i in range (1,round(0.7*size)): ###初始化输入X
for j in range(2,32):
X[i-1][j-2]=data.cell_value(i,j)

def sigmoid(x):
return 1/(1+math.exp(-x))

n=80 ##训练次数n
while(n):
sumdL=0

for i in range (0,round(0.7*size)-1): ##用70%训练模型
z=0 ##z暂时储存每一次的wx,最后相加得到总的W*X
y = data.cell_value(i + 1, 1)

for j in range(0,30):
z=z+W[j]*X[i][j]+W[30] ##z=wx+b, b为W[30]

Y[i]=sigmoid(z)
try:
dL[i]=-y/(1+math.exp(z)) + (1-y)/(1+math.exp(-z))
except OverflowError:
dL[i]=-y/(1+math.exp(z/10)) + (1-y)/(1+math.exp(-z/10)) ##防止计算exp发生溢出

sumdL=sumdL+dL[i] ##dL求和

for i in range(0,30):
W[i] = W[i] - alpha/30*sumdL

n=n-1

correctNum=0
for k in range (round(0.7*size),size-1): ##用另外30%测试模型
z=0 ##z暂时储存每一次的wx,最后相加得到总的W*X
y = data.cell_value(k + 1, 1)
for j in range(0,30):
z=z+W[j]*X[k][j]+W[30] ##z=wx+b, b为W[30]
Y[k]=sigmoid(z)

if ((y-Y[k])*(y-Y[k])<0.25): ##如果输出Y在0.5以下,认为是0类,在0.5以上,为1类

correctNum+=1

print('测试样本正确率: ',correctNum/(0.3*size))



Binary file added Exercise1/Result.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Exercise1/data.xlsx
Binary file not shown.
7 changes: 7 additions & 0 deletions Exercise1/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Exercise1 is about logistic regression on ``Breast Cancer Wisconsin (Diagnostic) Data Set''.

Here are the data and program files.

Note: 1.我修改了data.xlsx,M类替换成了1,B类替换成了0,这样方便做梯度下降。

2.对70%数据做训练,30%数据做测试,测试正确率为81%左右
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# SES2020spring

## Another day
NOTE:项目实践的代码放在了SES2020spring了,不是这个SES2020spring-1. 机器学习的Exercise放在这里,因为可以这个SES2020spring-1是folked,可以作pull request