Last active
September 20, 2022 12:26
-
-
Save eliasdabbas/64d7ec4de62dc35e16dee7c4e0acc8cb to your computer and use it in GitHub Desktop.
Score internal links using two columns of "Source" and "Destination". This calculates various link importance metrics link degree centrality, betweenness centrality and PageRank.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import networkx as nx | |
import pandas as pd | |
def score_links(links_file, domain): | |
"""Score a network on links based on their importance and centrality. | |
links_file: Path to the file having the links (needs a "Source" and | |
"Destination" columns) e.g. ScreamingFrog's outlinks file. | |
domain: Filter all links, making sure they all point to the domain you want. | |
""" | |
links = pd.read_csv(links_file) | |
links = links[links['Type'].eq('Hyperlink') & | |
links['Destination'].str.contains(domain, regex=False)] | |
src_dest = links[['Source', 'Destination']] | |
G = nx.DiGraph() | |
for src, dest in src_dest.values: | |
G.add_edge(src, dest) | |
graph_df = pd.DataFrame({'url': G.nodes}) | |
deg_cent = nx.degree_centrality(G) | |
in_deg_cent = nx.in_degree_centrality(G) | |
out_deg_cent = nx.out_degree_centrality(G) | |
bet_cent = nx.betweenness_centrality(G) | |
page_rank = nx.pagerank(G) | |
graph_df['degree_centrality'] = [deg_cent[url] for url in graph_df['url']] | |
graph_df['in_degree_centrality'] = [in_deg_cent[url] for url in graph_df['url']] | |
graph_df['out_degree_centrality'] = [out_deg_cent[url] for url in graph_df['url']] | |
graph_df['betweenness_centrality'] = [bet_cent[url] for url in graph_df['url']] | |
graph_df['pagerank'] = [page_rank[url] for url in graph_df['url']] | |
return graph_df |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment