ICode9

精准搜索请尝试: 精确搜索
首页 > 编程语言> 文章详细

node2vec之小黑尝试

2022-01-10 14:59:07  阅读:190  来源: 互联网

标签:尝试 node node2vec self walk nx alias probs 小黑


参数设定

import warnings
import random
warnings.filterwarnings('ignore')
import argparse
import numpy as np
import networkx as nx
#import node2vec
from gensim.models import Word2Vec
import random
np.random.seed(1)
def parse_args():
    '''
    Parses the node2vec arguments.
    '''
    # 使用parser加载信息
    parser = argparse.ArgumentParser(description="Run node2vec.")
    # 输入文件 
    parser.add_argument('--input', nargs='?', default='../graph/karate.edgelist',
                        help='Input graph path')
    # 输出文件
    parser.add_argument('--output', nargs='?', default='../emb/karate.emb',
                        help='Embeddings path')
    # embedding维度
    parser.add_argument('--dimensions', type=int, default=128,
                        help='Number of dimensions. Default is 128.')
    # 节点序列长度
    parser.add_argument('--walk-length', type=int, default=80,
                        help='Length of walk per source. Default is 80.')
    # 随机游走的次数
    parser.add_argument('--num-walks', type=int, default=10,
                        help='Number of walks per source. Default is 10.')
    # word2vec窗口大小,word2vec参数
    parser.add_argument('--window-size', type=int, default=10,
                        help='Context size for optimization. Default is 10.')
    # SGD优化时epoch数量,word2vec参数
    parser.add_argument('--iter', default=10, type=int,
                        help='Number of epochs in SGD')
    # 并行化核数,word2vec参数
    parser.add_argument('--workers', type=int, default=8,
                        help='Number of parallel workers. Default is 8.')
    # 参数p
    parser.add_argument('--p', type=float, default=1,
                        help='Return hyperparameter. Default is 1.')
    # 参数q
    parser.add_argument('--q', type=float, default=1,
                        help='Inout hyperparameter. Default is 1.')
    # 权重
    parser.add_argument('--weighted', dest='weighted', action='store_true',
                        help='Boolean specifying (un)weighted. Default is unweighted.')
    parser.add_argument('--unweighted', dest='unweighted', action='store_false')
    parser.set_defaults(weighted=False)
    # 有向无向
    parser.add_argument('--directed', dest='directed', action='store_true',
                        help='Graph is (un)directed. Default is undirected.')
    parser.add_argument('--undirected', dest='undirected', action='store_false')
    parser.set_defaults(directed=False)
    
    return parser.parse_args(args=[])
    # return parser.parse_known_args()
def read_graph(file):
    graph_x = nx.read_edgelist(file,nodetype = int,create_using = nx.DiGraph())
    for edge in graph_x.edges():
        graph_x[edge[0]][edge[1]]['weight'] = 1
    # 无向操作
    if args.undirected:
        graph_x = graph_x.to_undirected()
    return graph_x
args = parse_args()
nx_G = read_graph(args.input)
nx.draw(nx_G,with_labels = True)
list(nx_G.neighbors(25))

[32, 28, 26]
在这里插入图片描述

采样算法

def alias_setup(probs):
    smaller = []
    larger = []
    Q = np.zeros(len(probs),dtype = int)
    P = np.zeros(len(probs))
    probs = [prob / sum(probs) for prob in probs]
    for t,prob in enumerate(probs):
        P[t] = prob * len(probs)
        if P[t] > 1:
            larger.append(t)
        else:
            smaller.append(t)
    # 准备开始采样
    while larger and smaller:
        large = larger.pop()
        small = smaller.pop()
        Q[small] = large
        P[large] -= (1 - P[small])
        if P[large] < 1:
            smaller.append(large)
        else:
            larger.append(large)
    return Q,P
#J,q = alias_setup([1/2,1/3,1/12,1/12]) 
def alias_draw(J, q):
    length = len(J)
    PP = int(np.floor(np.random.rand() * length))
    QQ = int(np.floor(np.random.rand()))
    if QQ <= q[PP]:
        return PP
    else:
        return J[QQ]

定义模型的Graph类

在这里插入图片描述

class Graph(object):
    def __init__(self,nx_G,is_directed, p, q):
        self.nx_G = nx_G
        self.is_directed = is_directed
        self.p = p
        self.q = q
    def get_alias_edge(self, src, dst):
        nx_G = self.nx_G
        p = self.p
        q = self.q
        unnormalized_probs = []
        for node in sorted(nx_G.neighbors(dst)):
            if node == src:
                prob = nx_G[dst][node]['weight'] / p
            elif nx_G.has_edge(node,src):
                prob = nx_G[dst][node]['weight']
            else:
                prob = nx_G[dst][node]['weight'] / q
            unnormalized_probs.append(prob)
        normalized_probs = [float(prob) / sum(unnormalized_probs) for prob in unnormalized_probs]
        #print(normalized_probs)
        return alias_setup(normalized_probs)
    def preprocess_transition_probs(self):
        is_directed = self.is_directed
        nx_G = self.nx_G
        # 结点 的采样映射
        alias_nodes = {}
        for node in nx_G.nodes():
            unnormalized_probs = [nx_G[node][neighbor]['weight'] for neighbor in sorted(nx_G.neighbors(node))]
            probs = [prob / sum(unnormalized_probs) for prob in unnormalized_probs]
            alias_nodes[node] = alias_setup(probs)
        # 边的采样映射
        alias_edges = {}
        for src,tgt in nx_G.edges():
            unnormalized_probs = self.get_alias_edge(src,tgt)
            alias_edges[(src,tgt)] = unnormalized_probs
            if not is_directed:
                unnormalized_probs = self.get_alias_edge(tgt,src)
                alias_edges[(tgt,src)] = unnormalized_probs
        self.alias_nodes = alias_nodes
        self.alias_edges = alias_edges
        return alias_nodes,alias_edges
    def node2vec_walk(self, walk_length, start_node):
        nx_G = self.nx_G
        alias_nodes = self.alias_nodes
        alias_edges = self.alias_edges
        walk = [start_node]
        while len(walk) < walk_length:
            cur = walk[-1]
            neighbors = sorted(nx_G.neighbors(cur))
            if neighbors:
                if len(walk) == 1:
                	# 只有一个结点的话就对点采样
                    q,p = alias_nodes[cur]
                    sample_index = alias_draw(q,p)
                    walk.append(neighbors[sample_index])
                else:
                	# 超过两个点,就进行边采样
                    pre = walk[-2]
                    q,p = alias_edges[(pre,cur)]
                    sample_index = alias_draw(q,p)
                    walk.append(neighbors[sample_index])
            else:
                break
        return walk
    def simulate_walks(self, num_walks, walk_length):
        nx_G = self.nx_G
        nodes = list(nx_G.nodes())
        walks = []
        for t in range(num_walks):
            random.shuffle(nodes)
            #print('epoch {}'.format(t))
            for node in nodes:
                walks.append(self.node2vec_walk(walk_length,node))
        return walks

word2vec训练部分代码

def learn_embeddings(walks):
    # 将node的类型int转化为string
    walk_lol = []
    for walk in walks:
        tmp = []
        for node in walk:
            tmp.append(str(node))
        walk_lol.append(tmp)
    model = Word2Vec(walk_lol,
                     size = 2,
                     window = args.window_size,
                     min_count = 0,
                     sg = 1,
                     workers = args.workers,
                     iter = args.iter)
    model.wv.save_word2vec_format(args.output)
    return model

聚类

# k-means聚类
from sklearn import  cluster
from sklearn.metrics import adjusted_rand_score
from sklearn.model_selection import train_test_split
import pandas as pd
def draw_cluster(p,q,pos):
    g = Graph(nx_G,False,p,q)
    g.preprocess_transition_probs()
    walks = g.simulate_walks(40,40)
    
    # 训练node2vec模型
    model = learn_embeddings(walks)
    
    # 导入节点名称,获取embedding
    embedding_node=[]
    for i in range(1,35):
        j=str(i)
        embedding_node.append(model[j])
    embedding_node=np.matrix(embedding_node).reshape((34,-1))
    y_pred = cluster.KMeans(n_clusters=4).fit_predict(embedding_node) # 调用 test_RandomForestClassifier
    print(y_pred)
    pos = nx.spring_layout(nx_G)
    nx.draw_networkx_nodes(nx_G,pos,node_color = y_pred,label = True)
    nx.draw_networkx_edges(nx_G,pos,nodelist = nx_G.edges())

实验效果

# p小 q大,偏向宽度优先搜索,模型更具有同构(注意黄色点,起着桥接的属性!!!)
p = 0.1
q = 20
draw_cluster(p,q)

[2 2 3 2 1 1 1 2 3 3 1 2 2 2 0 0 1 2 0 2 0 2 0 0 0 0 0 0 3 0 3 3 0 0]
在这里插入图片描述

# p大 q小,偏向深度优先搜索,模型更具有社群属性
p = 20
q = 0.1
draw_cluster(p,q)

[0 0 3 0 2 2 2 0 3 3 2 0 0 0 1 1 2 0 1 0 1 0 1 1 1 1 1 1 3 1 3 3 1 1]

在这里插入图片描述

标签:尝试,node,node2vec,self,walk,nx,alias,probs,小黑
来源: https://blog.csdn.net/qq_37418807/article/details/122410193

本站声明: 1. iCode9 技术分享网(下文简称本站)提供的所有内容,仅供技术学习、探讨和分享;
2. 关于本站的所有留言、评论、转载及引用,纯属内容发起人的个人观点,与本站观点和立场无关;
3. 关于本站的所有言论和文字,纯属内容发起人的个人观点,与本站观点和立场无关;
4. 本站文章均是网友提供,不完全保证技术分享内容的完整性、准确性、时效性、风险性和版权归属;如您发现该文章侵犯了您的权益,可联系我们第一时间进行删除;
5. 本站为非盈利性的个人网站,所有内容不会用来进行牟利,也不会利用任何形式的广告来间接获益,纯粹是为了广大技术爱好者提供技术内容和技术思想的分享性交流网站。

专注分享技术,共同学习,共同进步。侵权联系[81616952@qq.com]

Copyright (C)ICode9.com, All Rights Reserved.

ICode9版权所有