https://tensorflow.blog/1%EC%9E%A5-%EC%8B%A4%EC%88%98%EC%B9%98-%ED%9A%8C%EB%A1%9C-%ED%95%B4%EC%BB%A4%EA%B0%80-%EC%95%8C%EB%A0%A4%EC%A3%BC%EB%8A%94-%EB%89%B4%EB%9F%B4-%EB%84%A4%ED%8A%B8%EC%9B%8C%ED%81%AC/
을 공부한 내용
https://github.com/jaejin0me/NeuralNetwork
을 공부한 내용
https://github.com/jaejin0me/NeuralNetwork
| import sys | |
| import random | |
| from math import exp | |
| class rgate: | |
| def mul(x,y): | |
| return x*y | |
| def add(x,y): | |
| return x+y | |
| class unit: | |
| value = 0.0 | |
| grad = 0.0 | |
| def __init__(self, value=0.0,grad=0.0): | |
| self.value = value | |
| self.grad = grad | |
| class u: | |
| value = 0.0 | |
| grad = 0.0 | |
| def __init__(self, value=0.0,grad=0.0): | |
| self.value = value | |
| self.grad = grad | |
| class mg: | |
| u0 = None | |
| u1 = None | |
| ru = None | |
| def __init__(self, u0=None,u1=None): | |
| self.u0 = None | |
| self.u1 = None | |
| self.ru = None | |
| def f(self, u0, u1): | |
| self.u0 = u0 | |
| self.u1 = u1 | |
| self.ru = unit(u0.value*u1.value,0) | |
| return self.ru | |
| def b(self): | |
| self.u0.grad += self.ru.grad * self.u1.value | |
| self.u1.grad += self.ru.grad * self.u0.value | |
| class ag: | |
| u0 = None | |
| u1 = None | |
| ru = None | |
| def f(self,u0, u1): | |
| self.u0 = u0 | |
| self.u1 = u1 | |
| self.ru = unit(u0.value+u1.value,0) | |
| return self.ru | |
| def b(self): | |
| self.u0.grad += self.ru.grad*1 | |
| self.u1.grad += self.ru.grad*1 | |
| class sg: | |
| u0 = None | |
| ru = None | |
| def sig(self, x): | |
| return 1/(1+exp(-x)) | |
| def f(self,u0): | |
| self.u0 = u0 | |
| self.ru = unit(self.sig(self.u0.value), 0) | |
| return self.ru | |
| def b(self): | |
| s = self.sig(self.u0.value) | |
| self.u0.grad += (s*(1-s))*self.ru.grad | |
| def fc(a,b,c,x,y): | |
| return 1/(1+exp(-(a*x+b*y+c))) | |
| class sqg: | |
| u0 = None | |
| ru = None | |
| def f(self,u0): | |
| self.u0 = u0 | |
| self.ru = unit(self.u0.value*self.u0.value,0.0) | |
| return self.ru | |
| def b(self): | |
| self.u0.grad += 2*self.u0.value*self.ru.grad | |
| class dg: | |
| u0 = None | |
| u1 = None | |
| ru = None | |
| def f(self,u0=unit(1.0,0.0),u1=unit(1.0,0.0)): | |
| self.u0 = u0 | |
| self.u1 = u1 | |
| self.ru = unit(self.u0.value/self.u1.value,0.0) | |
| return self.ru | |
| def b(self): | |
| self.u0.grad += (-1/(self.u0.value*self.u0.value)) | |
| class mag:#max gate | |
| u0 = None | |
| u1 = None | |
| ru = None | |
| def f(self,u0,u1): | |
| self.u0 = u0 | |
| self.u1 = u1 | |
| self.ru = unit(self.u0.value > self.u1.value and self.u0.value or self.u1.value , 0.0) | |
| return self.ru | |
| def b(self): | |
| self.u0.grad += self.ru.value == self.u0.value and 1.0*self.ru.grad or 0 | |
| self.u1.grad += self.ru.value == self.u1.value and 1.0*self.ru.grad or 0 | |
| class rg:#ReLU gate | |
| u0 = None | |
| ru = None | |
| def f(self,u0): | |
| self.u0 = u0 | |
| self.ru = unit(self.u0.value > 0 and self.u0.value or 0 , 0.0) | |
| return self.ru | |
| def b(self): | |
| self.u0.grad += self.ru.value > 0 and 1.0*self.ru.grad or 0 | |
| def sig(a): | |
| return 1/(1+exp(-a)) | |
| if __name__=='__main__': | |
| """ | |
| #Random Local Search,출력을 임의로 변화시켜 더 나은 출력을 찾는다 | |
| print(rgate.mul(1,2)) | |
| x = -2 | |
| y = 3 | |
| best_x = x | |
| best_y = y | |
| x_try=0 | |
| y_try=0 | |
| tweak_amout = 0.01 | |
| out = 0; | |
| best_out = -sys.maxsize-1 | |
| for each in range(100): | |
| print(each) | |
| x_try = x + tweak_amout*(random.random()*2-1) | |
| y_try = y + tweak_amout*(random.random()*2-1) | |
| out = rgate.mul(x_try,y_try) | |
| if(out>best_out): | |
| best_out = out | |
| best_x = x_try | |
| best_y = y_try | |
| print(best_x,best_y,best_out) | |
| """ | |
| """ | |
| #Numerical Gradient, 간단한 계산을 통해 기울기를 찾고 더 나은 출력을 도출해낸다 | |
| #기울기는 더 나은 출력을 위한 최선의 방향을 의미한다 | |
| #단일 뉴런으로 볼때는 큰 스텝이 좋은 출력을 내지만, 복잡하게 꼬여있는 경우 스텝이 크면 예상을 벗어나는 값이 나올 수 있다. | |
| #스텝의 크기는 눈을 가리고 언덕을 오를때의 보폭의 크기로 비유할 수 있다. 작으면 느리지만 확실하게 언덕을 오를 수 있지만, 크면 빠를수 있지만 다칠수 있다. | |
| x =-2 | |
| y = 3 | |
| out = rgate.mul(x,y) | |
| h = 0.0001 | |
| x_derivative = (rgate.mul(x+h,y)-out)/h | |
| y_derivative = (rgate.mul(x,y+h)-out)/h | |
| print(x_derivative) | |
| print(y_derivative) | |
| step_size = 0.01 | |
| out = rgate.mul(x,y) | |
| print(out) | |
| x = x + step_size*x_derivative | |
| y = y + step_size*y_derivative | |
| new_out = rgate.mul(x,y) | |
| print(new_out) | |
| """ | |
| """ | |
| #Analytic Gradient | |
| #기울기를 입,출력의 변화로 부터 계산할 경우 입력을 개수에 따라 계산하는 비용이 선형적으로 증가한다. | |
| #수백만 수억개가 있을때는 큰 비용이 들게 된다. | |
| #이 방법을 입출력에 변화를 주어 계산할 필요 없이, 미분공식으로 기울기는 구한다 | |
| x =-2 | |
| y = 3 | |
| out = rgate.mul(x,y) | |
| x_derivative = y #미분 결과에 의해 | |
| y_derivative = x #미분 결과에 의해 | |
| step_size = 0.01 | |
| out = rgate.mul(x,y) | |
| print(out) | |
| x = x + step_size*x_derivative | |
| y = y + step_size*y_derivative | |
| new_out = rgate.mul(x,y) | |
| print(new_out) | |
| """ | |
| """ | |
| 뉴럴네트워크 라이브러리를 기울기를 구할때 #3 공식기울기를 사용하지만, 검증은 계산기울기를 통해서 한다. | |
| 공식기울기는 효율적이지만 때로는 틀릴수도 있는 반면, 계산기울기는 비용은 크지만 확실한 값이다. | |
| """ | |
| """ | |
| #Backpropagation | |
| #연결된 게이트에서 #3공식기울기를 구할때는 체인룰을 적용한다. 체인룰은 곱셈으로 연결시키는 것이다. | |
| x = -2; y = 5; z =-4 | |
| q = rgate.add(x,y) | |
| f = rgate.mul(q,z) | |
| #print(q) | |
| print(f) | |
| d_f_wrt_q = z | |
| d_f_wrt_z = q | |
| d_q_wrt_x = 1.0 | |
| d_q_wrt_y = 1.0 | |
| d_f_wrt_x = d_q_wrt_x*d_f_wrt_q | |
| d_f_wrt_y = d_q_wrt_y*d_f_wrt_q | |
| g = [d_f_wrt_x,d_f_wrt_y,d_f_wrt_z] | |
| step = 0.01 | |
| x=x+step*g[0] | |
| y=y+step*g[1] | |
| z=z+step*g[2] | |
| q = rgate.add(x,y) | |
| f = rgate.mul(q,z) | |
| #print(q) | |
| print(f) | |
| """ | |
| a = unit(1.0, 0.0) | |
| b = unit(2.0, 0.0) | |
| c = unit(-3.0, 0.0) | |
| x = unit(-1.0, 0.0) | |
| y = unit(3.0 ,0.0) | |
| mg0 = mg() | |
| mg1 = mg() | |
| ag0 = ag() | |
| ag1 = ag() | |
| sg0 = sg() | |
| m1 = mg0.f(a, x) | |
| m2 = mg1.f(b, y) | |
| a1 = ag0.f(m1, m2) | |
| a2 = ag1.f(a1, c) | |
| s = sg0.f(a2) | |
| print(s.value) | |
| sg0.ru.grad = 1.0 | |
| sg0.b() | |
| ag0.b() | |
| ag1.b() | |
| mg0.b() | |
| mg1.b() | |
| step = 0.01 | |
| a.value += step*a.grad | |
| b.value += step*b.grad | |
| c.value += step*c.grad | |
| x.value += step*x.grad | |
| y.value += step*y.grad | |
| ax = mg0.f(a, x) | |
| by = mg1.f(b, y) | |
| ag0 = ag0.f(ax, by) | |
| ag1 = ag1.f(ag0, c) | |
| s = sg0.f(ag1) | |
| print(s.value) | |
| h = 0.001 | |
| a = 1 | |
| b = 2 | |
| c = -3 | |
| x = -1 | |
| y = 3 | |
| ga = (fc(a+h,b,c,x,y)-fc(a,b,c,x,y))/h | |
| gb = (fc(a,b+h,c,x,y)-fc(a,b,c,x,y))/h | |
| gc = (fc(a+h,b,c+h,x,y)-fc(a,b,c,x,y))/h | |
| gx = (fc(a+h,b,c,x+h,y)-fc(a,b,c,x,y))/h | |
| gy = (fc(a+h,b,c,x,y+h)-fc(a,b,c,x,y))/h | |
| print(ga) | |
| print(gb) | |
| print(gc) | |
| print(gx) | |
| print(gy) | |
| # * gate | |
| print() | |
| print("* gate") | |
| a = u(11.0,0.0) | |
| b = u(22.0,0.0) | |
| mg1 = mg() | |
| r1 = mg1.f(a,b) | |
| r1.grad = 1.0 | |
| mg1.b() | |
| da = mg1.u0.grad; print(da) | |
| db = mg1.u1.grad; print(db) | |
| print('------') | |
| # + gate | |
| print("+ gate") | |
| a = u(11.0,0.0) | |
| b = u(22.0,0.0) | |
| ag1 = ag() | |
| r1 = ag1.f(a,b) | |
| r1.grad = 1.0 | |
| ag1.b() | |
| da = ag1.u0.grad; print(da) | |
| db = ag1.u1.grad; print(db) | |
| print('------') | |
| # + gate, 3var | |
| print("+ gate, 3th var") | |
| #input def | |
| a = u(11.0,0.0) | |
| b = u(22.0,0.0) | |
| c = u(33.0,0.0) | |
| #gate def | |
| ag1 = ag() | |
| ag2 = ag() | |
| #cal f | |
| r1 = ag1.f(a,b) | |
| r2 = ag2.f(r1,c) | |
| #cal b | |
| r2.grad = 1.0 | |
| ag2.b() | |
| #r1.grad = ag2.u0.grad | |
| ag1.b() | |
| #print derivative | |
| dc = ag2.u0.grad; print(dc) | |
| db = ag1.u1.grad; print(db) | |
| da = ag1.u0.grad; print(da) | |
| print('------') | |
| # mixed gate | |
| print("mixed gate") | |
| #input def | |
| a = u(1.0,0.0) | |
| b = u(2.0,0.0) | |
| c = u(3.0,0.0) | |
| #gate def | |
| mg1 = mg() | |
| ag1 = ag() | |
| #cal f | |
| r1 = mg1.f(a,b) | |
| r2 = ag1.f(r1,c) | |
| #cal b | |
| r2.grad = 1.0 | |
| ag1.b() | |
| #r1.grad = ag1.u0.grad | |
| mg1.b() | |
| #print derivative | |
| dc = ag1.u0.grad; print(dc) | |
| db = mg1.u1.grad; print(db) | |
| da = mg1.u0.grad; print(da) | |
| print('------') | |
| # square gate | |
| print("square gate") | |
| #input def | |
| a = u(11.0,0.0) | |
| #gate def | |
| s1 = sqg() | |
| #cal f | |
| r1 = s1.f(a) | |
| #cal b | |
| r1.grad = 1.0 | |
| s1.b() | |
| #print derivative | |
| da = s1.u0.grad; print(da) | |
| print('------') | |
| # single neuron,ax+by+c | |
| print("single neuron,ax+by+c") | |
| #input def | |
| a = u(1.0,0.0) | |
| b = u(2.0,0.0) | |
| c = u(3.0,0.0) | |
| x = u(4.0,0.0) | |
| y = u(5.0,0.0) | |
| #gate def | |
| mg1 = mg() | |
| mg2 = mg() | |
| ag1 = ag() | |
| ag2 = ag() | |
| sg1 = sg() | |
| #cal f | |
| r1 = mg1.f(a,x) | |
| r2 = mg2.f(b,y) | |
| r3 = ag1.f(r1,r2) | |
| r4 = ag2.f(r3,c) | |
| r5 = sg1.f(r4) | |
| #cal b | |
| r5.grad = 1.0 | |
| sg1.b() | |
| #r4.grad = sg1.u0.grad | |
| ag2.b() | |
| #r3.grad = ag2.u0.grad | |
| ag1.b() | |
| #r2.grad = ag1.u1.grad | |
| mg2.b() | |
| #r1.grad = ag1.u0.grad | |
| mg1.b() | |
| #print derivative | |
| da = mg1.u0.grad; print(da) | |
| db = mg2.u0.grad; print(db) | |
| dc = ag2.u1.grad; print(dc) | |
| dx = mg1.u1.grad; print(dx) | |
| dy = mg2.u1.grad; print(dy) | |
| print('------') | |
| # Math.pow(((a*b+c),2); | |
| print("Math.pow(((a*b+c),2) gate") | |
| #input def | |
| a = u(3.0,0.0) | |
| b = u(2.0,0.0) | |
| c = u(1.0,0.0) | |
| #gate def | |
| mg1 = mg() | |
| ag1 = ag() | |
| sq1 = sqg() | |
| #cal f | |
| r1 = mg1.f(a,b) | |
| r2 = ag1.f(r1,c) | |
| r3 = sq1.f(r2) | |
| #cal b | |
| r3.grad = 1.0 | |
| sq1.b() | |
| ag1.b() | |
| mg1.b() | |
| #print derivative | |
| da = mg1.u0.grad; print(da) | |
| db = mg1.u1.grad; print(db) | |
| dc = ag1.u1.grad; print(dc) | |
| print('------') | |
| # division gate | |
| print("division gate") | |
| #input def | |
| a = u(3.0,0.0) | |
| #gate def | |
| dg1 = dg() | |
| #cal f | |
| r1 = dg1.f(a) | |
| #cal b | |
| r1.grad = 1.0 | |
| dg1.b() | |
| #print derivative | |
| da = dg1.u0.grad; print(da) | |
| print('------') | |
| # max gate | |
| print("max gate") | |
| #input def | |
| a = u(1.0,0.0) | |
| b = u(2.0,0.0) | |
| #gate def | |
| mag1 = mag() | |
| #cal f | |
| r1 = mag1.f(a,b) | |
| #cal b | |
| r1.grad = 1.0 | |
| mag1.b() | |
| #print derivative | |
| da = mag1.u0.grad; print(da) | |
| db = mag1.u1.grad; print(db) | |
| print('------') | |
| # ReLU gate | |
| print("ReLU gate") | |
| #input def | |
| a = u(1.0,0.0) | |
| #gate def | |
| rg1 = rg() | |
| #cal f | |
| r1 = rg1.f(a) | |
| #cal b | |
| r1.grad = 1.0 | |
| rg1.b() | |
| #print derivative | |
| da = rg1.u0.grad; print(da) | |
| print('------') | |
| # mixed gate, (a+b)/(c+d) | |
| print("mixed gate, (a+b)/(c+d)") | |
| #input def | |
| a = u(1.0,0.0) | |
| b = u(2.0,0.0) | |
| c = u(3.0,0.0) | |
| d = u(4.0,0.0) | |
| #gate def | |
| ag1 = ag() | |
| ag2 = ag() | |
| dg1 = dg() | |
| mg1 = mg() | |
| #cal f | |
| r1 = ag1.f(a,b) | |
| r2 = ag2.f(c,d) | |
| r3 = dg1.f(r2) | |
| r4 = mg1.f(r1,r3) | |
| #cal b | |
| r4.grad = 1.0 | |
| mg1.b() | |
| dg1.b() | |
| ag1.b() | |
| ag2.b() | |
| #print derivative | |
| da = ag1.u0.grad; print(da) | |
| db = ag1.u1.grad; print(db) | |
| dc = ag2.u0.grad; print(dc) | |
| dd = ag2.u1.grad; print(dd) | |
| print('------') |
댓글
댓글 쓰기