기본 콘텐츠로 건너뛰기

뉴럴 네트워크 기초 - 실수치 회로, 역전파

https://tensorflow.blog/1%EC%9E%A5-%EC%8B%A4%EC%88%98%EC%B9%98-%ED%9A%8C%EB%A1%9C-%ED%95%B4%EC%BB%A4%EA%B0%80-%EC%95%8C%EB%A0%A4%EC%A3%BC%EB%8A%94-%EB%89%B4%EB%9F%B4-%EB%84%A4%ED%8A%B8%EC%9B%8C%ED%81%AC/
을 공부한 내용

https://github.com/jaejin0me/NeuralNetwork


import sys
import random
from math import exp
class rgate:
def mul(x,y):
return x*y
def add(x,y):
return x+y
class unit:
value = 0.0
grad = 0.0
def __init__(self, value=0.0,grad=0.0):
self.value = value
self.grad = grad
class u:
value = 0.0
grad = 0.0
def __init__(self, value=0.0,grad=0.0):
self.value = value
self.grad = grad
class mg:
u0 = None
u1 = None
ru = None
def __init__(self, u0=None,u1=None):
self.u0 = None
self.u1 = None
self.ru = None
def f(self, u0, u1):
self.u0 = u0
self.u1 = u1
self.ru = unit(u0.value*u1.value,0)
return self.ru
def b(self):
self.u0.grad += self.ru.grad * self.u1.value
self.u1.grad += self.ru.grad * self.u0.value
class ag:
u0 = None
u1 = None
ru = None
def f(self,u0, u1):
self.u0 = u0
self.u1 = u1
self.ru = unit(u0.value+u1.value,0)
return self.ru
def b(self):
self.u0.grad += self.ru.grad*1
self.u1.grad += self.ru.grad*1
class sg:
u0 = None
ru = None
def sig(self, x):
return 1/(1+exp(-x))
def f(self,u0):
self.u0 = u0
self.ru = unit(self.sig(self.u0.value), 0)
return self.ru
def b(self):
s = self.sig(self.u0.value)
self.u0.grad += (s*(1-s))*self.ru.grad
def fc(a,b,c,x,y):
return 1/(1+exp(-(a*x+b*y+c)))
class sqg:
u0 = None
ru = None
def f(self,u0):
self.u0 = u0
self.ru = unit(self.u0.value*self.u0.value,0.0)
return self.ru
def b(self):
self.u0.grad += 2*self.u0.value*self.ru.grad
class dg:
u0 = None
u1 = None
ru = None
def f(self,u0=unit(1.0,0.0),u1=unit(1.0,0.0)):
self.u0 = u0
self.u1 = u1
self.ru = unit(self.u0.value/self.u1.value,0.0)
return self.ru
def b(self):
self.u0.grad += (-1/(self.u0.value*self.u0.value))
class mag:#max gate
u0 = None
u1 = None
ru = None
def f(self,u0,u1):
self.u0 = u0
self.u1 = u1
self.ru = unit(self.u0.value > self.u1.value and self.u0.value or self.u1.value , 0.0)
return self.ru
def b(self):
self.u0.grad += self.ru.value == self.u0.value and 1.0*self.ru.grad or 0
self.u1.grad += self.ru.value == self.u1.value and 1.0*self.ru.grad or 0
class rg:#ReLU gate
u0 = None
ru = None
def f(self,u0):
self.u0 = u0
self.ru = unit(self.u0.value > 0 and self.u0.value or 0 , 0.0)
return self.ru
def b(self):
self.u0.grad += self.ru.value > 0 and 1.0*self.ru.grad or 0
def sig(a):
return 1/(1+exp(-a))
if __name__=='__main__':
"""
#Random Local Search,출력을 임의로 변화시켜 더 나은 출력을 찾는다
print(rgate.mul(1,2))
x = -2
y = 3
best_x = x
best_y = y
x_try=0
y_try=0
tweak_amout = 0.01
out = 0;
best_out = -sys.maxsize-1
for each in range(100):
print(each)
x_try = x + tweak_amout*(random.random()*2-1)
y_try = y + tweak_amout*(random.random()*2-1)
out = rgate.mul(x_try,y_try)
if(out>best_out):
best_out = out
best_x = x_try
best_y = y_try
print(best_x,best_y,best_out)
"""
"""
#Numerical Gradient, 간단한 계산을 통해 기울기를 찾고 더 나은 출력을 도출해낸다
#기울기는 더 나은 출력을 위한 최선의 방향을 의미한다
#단일 뉴런으로 볼때는 큰 스텝이 좋은 출력을 내지만, 복잡하게 꼬여있는 경우 스텝이 크면 예상을 벗어나는 값이 나올 수 있다.
#스텝의 크기는 눈을 가리고 언덕을 오를때의 보폭의 크기로 비유할 수 있다. 작으면 느리지만 확실하게 언덕을 오를 수 있지만, 크면 빠를수 있지만 다칠수 있다.
x =-2
y = 3
out = rgate.mul(x,y)
h = 0.0001
x_derivative = (rgate.mul(x+h,y)-out)/h
y_derivative = (rgate.mul(x,y+h)-out)/h
print(x_derivative)
print(y_derivative)
step_size = 0.01
out = rgate.mul(x,y)
print(out)
x = x + step_size*x_derivative
y = y + step_size*y_derivative
new_out = rgate.mul(x,y)
print(new_out)
"""
"""
#Analytic Gradient
#기울기를 입,출력의 변화로 부터 계산할 경우 입력을 개수에 따라 계산하는 비용이 선형적으로 증가한다.
#수백만 수억개가 있을때는 큰 비용이 들게 된다.
#이 방법을 입출력에 변화를 주어 계산할 필요 없이, 미분공식으로 기울기는 구한다
x =-2
y = 3
out = rgate.mul(x,y)
x_derivative = y #미분 결과에 의해
y_derivative = x #미분 결과에 의해
step_size = 0.01
out = rgate.mul(x,y)
print(out)
x = x + step_size*x_derivative
y = y + step_size*y_derivative
new_out = rgate.mul(x,y)
print(new_out)
"""
"""
뉴럴네트워크 라이브러리를 기울기를 구할때 #3 공식기울기를 사용하지만, 검증은 계산기울기를 통해서 한다.
공식기울기는 효율적이지만 때로는 틀릴수도 있는 반면, 계산기울기는 비용은 크지만 확실한 값이다.
"""
"""
#Backpropagation
#연결된 게이트에서 #3공식기울기를 구할때는 체인룰을 적용한다. 체인룰은 곱셈으로 연결시키는 것이다.
x = -2; y = 5; z =-4
q = rgate.add(x,y)
f = rgate.mul(q,z)
#print(q)
print(f)
d_f_wrt_q = z
d_f_wrt_z = q
d_q_wrt_x = 1.0
d_q_wrt_y = 1.0
d_f_wrt_x = d_q_wrt_x*d_f_wrt_q
d_f_wrt_y = d_q_wrt_y*d_f_wrt_q
g = [d_f_wrt_x,d_f_wrt_y,d_f_wrt_z]
step = 0.01
x=x+step*g[0]
y=y+step*g[1]
z=z+step*g[2]
q = rgate.add(x,y)
f = rgate.mul(q,z)
#print(q)
print(f)
"""
a = unit(1.0, 0.0)
b = unit(2.0, 0.0)
c = unit(-3.0, 0.0)
x = unit(-1.0, 0.0)
y = unit(3.0 ,0.0)
mg0 = mg()
mg1 = mg()
ag0 = ag()
ag1 = ag()
sg0 = sg()
m1 = mg0.f(a, x)
m2 = mg1.f(b, y)
a1 = ag0.f(m1, m2)
a2 = ag1.f(a1, c)
s = sg0.f(a2)
print(s.value)
sg0.ru.grad = 1.0
sg0.b()
ag0.b()
ag1.b()
mg0.b()
mg1.b()
step = 0.01
a.value += step*a.grad
b.value += step*b.grad
c.value += step*c.grad
x.value += step*x.grad
y.value += step*y.grad
ax = mg0.f(a, x)
by = mg1.f(b, y)
ag0 = ag0.f(ax, by)
ag1 = ag1.f(ag0, c)
s = sg0.f(ag1)
print(s.value)
h = 0.001
a = 1
b = 2
c = -3
x = -1
y = 3
ga = (fc(a+h,b,c,x,y)-fc(a,b,c,x,y))/h
gb = (fc(a,b+h,c,x,y)-fc(a,b,c,x,y))/h
gc = (fc(a+h,b,c+h,x,y)-fc(a,b,c,x,y))/h
gx = (fc(a+h,b,c,x+h,y)-fc(a,b,c,x,y))/h
gy = (fc(a+h,b,c,x,y+h)-fc(a,b,c,x,y))/h
print(ga)
print(gb)
print(gc)
print(gx)
print(gy)
# * gate
print()
print("* gate")
a = u(11.0,0.0)
b = u(22.0,0.0)
mg1 = mg()
r1 = mg1.f(a,b)
r1.grad = 1.0
mg1.b()
da = mg1.u0.grad; print(da)
db = mg1.u1.grad; print(db)
print('------')
# + gate
print("+ gate")
a = u(11.0,0.0)
b = u(22.0,0.0)
ag1 = ag()
r1 = ag1.f(a,b)
r1.grad = 1.0
ag1.b()
da = ag1.u0.grad; print(da)
db = ag1.u1.grad; print(db)
print('------')
# + gate, 3var
print("+ gate, 3th var")
#input def
a = u(11.0,0.0)
b = u(22.0,0.0)
c = u(33.0,0.0)
#gate def
ag1 = ag()
ag2 = ag()
#cal f
r1 = ag1.f(a,b)
r2 = ag2.f(r1,c)
#cal b
r2.grad = 1.0
ag2.b()
#r1.grad = ag2.u0.grad
ag1.b()
#print derivative
dc = ag2.u0.grad; print(dc)
db = ag1.u1.grad; print(db)
da = ag1.u0.grad; print(da)
print('------')
# mixed gate
print("mixed gate")
#input def
a = u(1.0,0.0)
b = u(2.0,0.0)
c = u(3.0,0.0)
#gate def
mg1 = mg()
ag1 = ag()
#cal f
r1 = mg1.f(a,b)
r2 = ag1.f(r1,c)
#cal b
r2.grad = 1.0
ag1.b()
#r1.grad = ag1.u0.grad
mg1.b()
#print derivative
dc = ag1.u0.grad; print(dc)
db = mg1.u1.grad; print(db)
da = mg1.u0.grad; print(da)
print('------')
# square gate
print("square gate")
#input def
a = u(11.0,0.0)
#gate def
s1 = sqg()
#cal f
r1 = s1.f(a)
#cal b
r1.grad = 1.0
s1.b()
#print derivative
da = s1.u0.grad; print(da)
print('------')
# single neuron,ax+by+c
print("single neuron,ax+by+c")
#input def
a = u(1.0,0.0)
b = u(2.0,0.0)
c = u(3.0,0.0)
x = u(4.0,0.0)
y = u(5.0,0.0)
#gate def
mg1 = mg()
mg2 = mg()
ag1 = ag()
ag2 = ag()
sg1 = sg()
#cal f
r1 = mg1.f(a,x)
r2 = mg2.f(b,y)
r3 = ag1.f(r1,r2)
r4 = ag2.f(r3,c)
r5 = sg1.f(r4)
#cal b
r5.grad = 1.0
sg1.b()
#r4.grad = sg1.u0.grad
ag2.b()
#r3.grad = ag2.u0.grad
ag1.b()
#r2.grad = ag1.u1.grad
mg2.b()
#r1.grad = ag1.u0.grad
mg1.b()
#print derivative
da = mg1.u0.grad; print(da)
db = mg2.u0.grad; print(db)
dc = ag2.u1.grad; print(dc)
dx = mg1.u1.grad; print(dx)
dy = mg2.u1.grad; print(dy)
print('------')
# Math.pow(((a*b+c),2);
print("Math.pow(((a*b+c),2) gate")
#input def
a = u(3.0,0.0)
b = u(2.0,0.0)
c = u(1.0,0.0)
#gate def
mg1 = mg()
ag1 = ag()
sq1 = sqg()
#cal f
r1 = mg1.f(a,b)
r2 = ag1.f(r1,c)
r3 = sq1.f(r2)
#cal b
r3.grad = 1.0
sq1.b()
ag1.b()
mg1.b()
#print derivative
da = mg1.u0.grad; print(da)
db = mg1.u1.grad; print(db)
dc = ag1.u1.grad; print(dc)
print('------')
# division gate
print("division gate")
#input def
a = u(3.0,0.0)
#gate def
dg1 = dg()
#cal f
r1 = dg1.f(a)
#cal b
r1.grad = 1.0
dg1.b()
#print derivative
da = dg1.u0.grad; print(da)
print('------')
# max gate
print("max gate")
#input def
a = u(1.0,0.0)
b = u(2.0,0.0)
#gate def
mag1 = mag()
#cal f
r1 = mag1.f(a,b)
#cal b
r1.grad = 1.0
mag1.b()
#print derivative
da = mag1.u0.grad; print(da)
db = mag1.u1.grad; print(db)
print('------')
# ReLU gate
print("ReLU gate")
#input def
a = u(1.0,0.0)
#gate def
rg1 = rg()
#cal f
r1 = rg1.f(a)
#cal b
r1.grad = 1.0
rg1.b()
#print derivative
da = rg1.u0.grad; print(da)
print('------')
# mixed gate, (a+b)/(c+d)
print("mixed gate, (a+b)/(c+d)")
#input def
a = u(1.0,0.0)
b = u(2.0,0.0)
c = u(3.0,0.0)
d = u(4.0,0.0)
#gate def
ag1 = ag()
ag2 = ag()
dg1 = dg()
mg1 = mg()
#cal f
r1 = ag1.f(a,b)
r2 = ag2.f(c,d)
r3 = dg1.f(r2)
r4 = mg1.f(r1,r3)
#cal b
r4.grad = 1.0
mg1.b()
dg1.b()
ag1.b()
ag2.b()
#print derivative
da = ag1.u0.grad; print(da)
db = ag1.u1.grad; print(db)
dc = ag2.u0.grad; print(dc)
dd = ag2.u1.grad; print(dd)
print('------')

댓글

이 블로그의 인기 게시물

맥스 어만(Max Ehrmann) - 소망(진정 바라는 것)

진정 바라는 것                                                                       -맥스 어만 소란스럽고 바쁜 일상속에서도  침묵 안에 평화가 있다는 사실을 기억하십시오 포기하지 말고 가능한한 모든 사람들과 잘 지내도록 하십시오 조용하면서도 분명하게 진실을 말하고  어리석고 무지한 사람들의 말에도 귀를 기울이십시오  그들 역시 할 이야기가 있을테니까요  목소리가 크고 공격적인 사람들은 피하십시오  그들은 영혼을 괴롭힙니다 자신을 다른 사람들과 비교하면 자신이 하찮아 보이고  비참한 마음이 들수도 있습니다  더 위대하거나 더 못한 사람들은 언제나...