# 深度学习在金融领域的应用

1）序列数据建模：代表算法 LSTM

2）图谱建模：代表算法 GCN

3）传统特征衍生：代表算法 CNN、XDeepFM

#### 循环神经网络

B卡

from torch import nn

class BLSTM(nn.Module):

“””

Implementation of BLSTM Concatenation for sentiment classification task

“””

def __init__(self, embeddings, input_dim, hidden_dim, num_layers, output_dim, max_len=40, dropout=0.5):

super(BLSTM, self).__init__()

self.emb = nn.Embedding(num_embeddings=embeddings.size(0),

embedding_dim=embeddings.size(1),

self.emb.weight = nn.Parameter(embeddings)

self.input_dim = input_dim

self.hidden_dim = hidden_dim

self.output_dim = output_dim

# sen encoder

self.sen_len = max_len

self.sen_rnn = nn.LSTM(input_size=input_dim,

hidden_size=hidden_dim,

num_layers=num_layers,

dropout=dropout,

batch_first=True,

bidirectional=True)

self.output = nn.Linear(2 * self.hidden_dim, output_dim)

def bi_fetch(self, rnn_outs, seq_lengths, batch_size, max_len):

rnn_outs = rnn_outs.view(batch_size, max_len, 2, -1)

# (batch_size, max_len, 1, -1)

fw_out = torch.index_select(rnn_outs, 2, Variable(torch.LongTensor([0])).cuda())

fw_out = fw_out.view(batch_size * max_len, -1)

bw_out = torch.index_select(rnn_outs, 2, Variable(torch.LongTensor([1])).cuda())

bw_out = bw_out.view(batch_size * max_len, -1)

batch_range = Variable(torch.LongTensor(range(batch_size))).cuda() * max_len

batch_zeros = Variable(torch.zeros(batch_size).long()).cuda()

fw_index = batch_range + seq_lengths.view(batch_size) – 1

fw_out = torch.index_select(fw_out, 0, fw_index) # (batch_size, hid)

bw_index = batch_range + batch_zeros

bw_out = torch.index_select(bw_out, 0, bw_index)

outs = torch.cat([fw_out, bw_out], dim=1)

return outs

def forward(self, sen_batch, sen_lengths, sen_mask_matrix):

“””

:param sen_batch: (batch, sen_length), tensor for sentence sequence

:param sen_lengths:

:return:

“””

”’ Embedding Layer | Padding | Sequence_length 40”’

sen_batch = self.emb(sen_batch)

batch_size = len(sen_batch)

”’ Bi-LSTM Computation ”’

sen_outs, _ = self.sen_rnn(sen_batch.view(batch_size, -1, self.input_dim))

sen_rnn = sen_outs.contiguous().view(batch_size, -1, 2 * self.hidden_dim) # (batch, sen_len, 2*hid)

”’ Fetch the truly last hidden layer of both sides”’

sentence_batch = self.bi_fetch(sen_rnn, sen_lengths, batch_size, self.sen_len) # (batch_size, 2*hid)

representation = sentence_batch

out = self.output(representation)

out_prob = F.softmax(out.view(batch_size, -1))

return out_prob

#### 卷积神经网络

CNN中的卷积本质上就是利用一个共享参数的过滤器（kernel），通过计算中心像素点以及相邻像素点的加权和来构成feature map实现空间特征的提取，加权系数就是卷积核的权重系数。

Word2Vec的基本流程如下

#### node2vec

node2vec 可以改善random walk，更好地反映同质性与结构相似性

q 越大，p越小，结构相似性所占比重越高

p 越大，q越小，同质性所占比重越高

#### Node2Vec的pytorch实现

#preprocess_transition_probs（初始生成节点到节点的概率）

def preprocess_transition_probs(self):

”’

Preprocessing of transition probabilities for guiding the random walks.

”’

####get_alias_edge这个函数是对每条边设定为二阶randomwalk的概率形式

###这个函数的作用是生成每个边界的概率，同时会有alias_setup这个函数将概率进行转换，方便后面抽样

G = self.G

is_directed = self.is_directed

alias_nodes = {}

for node in G.nodes():

unnormalized_probs = [G[node][nbr][‘weight’] for nbr in sorted(G.neighbors(node))]#读取每个邻点权重

norm_const = sum(unnormalized_probs)###权重求和，作为公式中正则项常数的那个分母

normalized_probs = [float(u_prob)/norm_const for u_prob in unnormalized_probs]###除以分母

alias_nodes[node] = alias_setup(normalized_probs)

alias_edges = {}

if is_directed:

for edge in G.edges():

alias_edges[edge] = self.get_alias_edge(edge[0], edge[1])

else:

for edge in G.edges():

alias_edges[edge] = self.get_alias_edge(edge[0], edge[1])

alias_edges[(edge[1], edge[0])] = self.get_alias_edge(edge[1], edge[0])

self.alias_nodes = alias_nodes

self.alias_edges = alias_edges

return

#get_alias_edge是得到节点到节点的概率

def get_alias_edge(self, src, dst):####二阶ramdom walk

#src是随机游走序列中的上一个节点，dst是当前节点

”’

Get the alias edge setup lists for a given edge.

”’

G = self.G

p = self.p

q = self.q

unnormalized_probs = []

for dst_nbr in sorted(G.neighbors(dst)):

if dst_nbr == src:

unnormalized_probs.append(G[dst][dst_nbr][‘weight’]/p)

elif G.has_edge(dst_nbr, src):

unnormalized_probs.append(G[dst][dst_nbr][‘weight’])

else:

unnormalized_probs.append(G[dst][dst_nbr][‘weight’]/q)

norm_const = sum(unnormalized_probs)

normalized_probs = [float(u_prob)/norm_const for u_prob in unnormalized_probs]

return alias_setup(normalized_probs)

#alias_setup ：输入概率，得到对应的两组数，方便后面的抽样调用

def alias_setup(probs):

”’

alias_setup的作用是根据二阶random walk输出的概率变成每个节点对应两个数，被后面的alias_draw函数所进行抽样

”’

K = len(probs)

q = np.zeros(K)

J = np.zeros(K, dtype=np.int)

smaller = []

larger = []

for kk, prob in enumerate(probs):

q[kk] = K*prob

if q[kk] < 1.0:

smaller.append(kk)

else:

larger.append(kk)##kk是下标，表示哪些下标小

while len(smaller) > 0 and len(larger) > 0:

small = smaller.pop()##smaller自己也会减少最右边的值

large = larger.pop()

J[small] = large

q[large] = q[large] + q[small] – 1.0

if q[large] < 1.0:

smaller.append(large)

else:

larger.append(large)

return J, q

#alias_draw 抽样函数

def alias_draw(J, q):

”’

Draw sample from a non-uniform discrete distribution using alias sampling.

”’

K = len(J)

kk = int(np.floor(np.random.rand()*K))

if np.random.rand() < q[kk]:

return kk

else:

return J[kk]

#node2vec_walk就是对于给定的长度，对于开始节点开始模拟这个节点的路径，涉及的函数都在上面提及

def node2vec_walk(self, walk_length, start_node):

”’

Simulate a random walk starting from start node.

”’

G = self.G

alias_nodes = self.alias_nodes

alias_edges = self.alias_edges

walk = [start_node]

######alias_draw这个函数是等于是根据二阶random walk概率选择下一个点

while len(walk) < walk_length:

cur = walk[-1]

cur_nbrs = sorted(G.neighbors(cur))###G.neighbors(cur)得到cur一级关联的节点

if len(cur_nbrs) > 0:

if len(walk) == 1:

####cur[0]

walk.append(cur_nbrs[alias_draw(alias_nodes[cur][0], alias_nodes[cur][1])])

else:

prev = walk[-2]

next = cur_nbrs[alias_draw(alias_edges[(prev, cur)][0],

alias_edges[(prev, cur)][1])]

walk.append(next)

else:

break

return walk

#### 图卷积神经网络（Graphs Convolutional Neural Networks ）

CNN处理的图像或者视频数据中像素点（pixel）是排列成成很整齐的矩阵。与之相对应,科学研究中还有很多Non Euclidean Structure的数据,社交网络、信息网络中有很多类似的结构。

CNN无法处理Non Euclidean Structure的数据，学术上的表达是传统的离散卷积（如问题1中所述）在Non Euclidean Structure的数据上无法保持平移不变性。通俗理解就是在拓扑图中每个顶点的相邻顶点数目都可能不同，那幺当然无法用一个同样尺寸的卷积核来进行卷积运算。

#### 因子分解机

FM算法的pytorch实现：

class FM_model(nn.Module):

def __init__(self, n, k):

super(FM_model, self).__init__()

self.n = n # len(items) + len(users)

self.k = k

self.linear = nn.Linear(self.n, 1, bias=True)

self.v = nn.Parameter(torch.randn(self.k, self.n))

def fm_layer(self, x):

# x 属于 R^{batch*n}

linear_part = self.linear(x)

# 矩阵相乘 (batch*p) * (p*k)

inter_part1 = torch.mm(x, self.v.t()) # out_size = (batch, k)

# 矩阵相乘 (batch*p)^2 * (p*k)^2

inter_part2 = torch.mm(torch.pow(x, 2), torch.pow(self.v, 2).t()) # out_size = (batch, k)

output = linear_part + 0.5 * torch.sum(torch.pow(inter_part1, 2) – inter_part2)

# 这里torch求和一定要用sum

return output # out_size = (batch, 1)

def forward(self, x):

output = self.fm_layer(x)

return output

#### xDeepFM算法

CIN中一个神经元相关的接受域是垂直于特征维度D的整个平面，而CNN中的接受域是当前神经元周围的局部小范围区域，因此CIN中经过卷积操作得到的特征图（Feature Map）是一个向量，而不是一个矩阵。