## Loading required package: xergm.common
## Loading required package: ergm
## Loading required package: statnet.common
##
## Attaching package: 'statnet.common'
## The following object is masked from 'package:base':
##
## order
## Loading required package: network
## network: Classes for Relational Data
## Version 1.13.0 created on 2015-08-31.
## copyright (c) 2005, Carter T. Butts, University of California-Irvine
## Mark S. Handcock, University of California -- Los Angeles
## David R. Hunter, Penn State University
## Martina Morris, University of Washington
## Skye Bender-deMoll, University of Washington
## For citation information, type citation("network").
## Type help("network-package") to get started.
##
## ergm: version 3.8.0, created on 2017-08-18
## Copyright (c) 2017, Mark S. Handcock, University of California -- Los Angeles
## David R. Hunter, Penn State University
## Carter T. Butts, University of California -- Irvine
## Steven M. Goodreau, University of Washington
## Pavel N. Krivitsky, University of Wollongong
## Martina Morris, University of Washington
## with contributions from
## Li Wang
## Kirk Li, University of Washington
## Skye Bender-deMoll, University of Washington
## Based on "statnet" project software (statnet.org).
## For license and citation information see statnet.org/attribution
## or type citation("ergm").
## NOTE: Versions before 3.6.1 had a bug in the implementation of the
## bd() constriant which distorted the sampled distribution somewhat.
## In addition, Sampson's Monks datasets had mislabeled vertices. See
## the NEWS and the documentation for more details.
##
## Attaching package: 'xergm.common'
## The following object is masked from 'package:ergm':
##
## gof
## Loading required package: ggplot2
## Package: btergm
## Version: 1.9.0
## Date: 2017-03-30
## Authors: Philip Leifeld (University of Glasgow)
## Skyler J. Cranmer (The Ohio State University)
## Bruce A. Desmarais (Pennsylvania State University)
## Version: 1.36.23
## Date: 2017-03-03
## Author: Philip Leifeld (University of Glasgow)
##
## Please cite the JSS article in your publications -- see citation("texreg").
Set parameters for the analysis and load the data into memory:
## analysis parameters
firm_i <- 'qualtrics' ## focal firm
d <- 2 ## ego network theshold (order)
## load RDS data file into memory as a list of networks
# data_file <- file.path(data_dir,sprintf('%s_d%s.rds',firm_i,d))
data_file <- file.path(data_dir, 'tutorial_d2_competition_network_sample.rds')
nets.all <- readRDS(data_file)
len <- length(nets.all)
## set number of time periods
nPeriods <- 8 ## any number from 3 to 11 is OK, but use 8 to compare results with example
## subset network periods
nets <- nets.all[(len-nPeriods+1):len]
net <- nets[[1]]
print(net)
## Network attributes:
## vertices = 180
## directed = FALSE
## hyper = FALSE
## loops = FALSE
## multiple = FALSE
## bipartite = FALSE
## mmc:
## 0 0.0833333333333333 0.125
## 32344 2 2
## 0.166666666666667 0.25 0.333333333333333
## 2 20 4
## 0.5
## 26
## dist: 180x180 matrix
## similarity: 180x180 matrix
## coop:
## 0
## 32400
## coop_bin:
## 0
## 32400
## coop_past:
## 0
## 32400
## coop_past_bin:
## 0
## 32400
## total edges= 92
## missing edges= 0
## non-missing edges= 92
##
## Vertex attribute names:
## acquired_name acquired_on acquired_uuid acquired_vids age category_group_list category_list cent_deg cent_eig cent_pow_n0_0 cent_pow_n0_1 cent_pow_n0_2 cent_pow_n0_3 cent_pow_n0_4 cent_pow_n0_5 city closed_on closed_year com_edgebetween com_fastgreedy com_infomap com_labelprop com_multilevel com_walktrap company_cusip company_cusip_6 company_gvkey company_name company_sic company_uuid constraint country_code domain employee_count founded_on founded_year genidx_edgebetween genidx_fastgreedy genidx_infomap genidx_labelprop genidx_multilevel genidx_walktrap id ipo_status njobs_edgebetween njobs_fastgreedy njobs_infomap njobs_labelprop njobs_multilevel njobs_walktrap orig.vid region state_code status_update vertex.names weight
##
## Edge attribute names:
## relation_began_on relation_ended_on weight
dim(net[,])
## [1] 180 180
print(net[1:4,1:4])
## 123contactform abroad101 actuate allegiance
## 123contactform 0 0 0 0
## abroad101 0 0 0 0
## actuate 0 0 0 0
## allegiance 0 0 0 0
## missing value strings to convert to <NA> type in imported data file
na.strings <- c('NA', 'na', '')
##====================================
## Add New Data from Owler Data Files
##------------------------------------
## new data directory name
ower_data_dir <- 'owler_data'
## new data directory
owler_dir <- file.path(data_dir,ower_data_dir)
## init new dataframes
vt <- data.frame() #vertices
el <- data.frame() #edge list
## loop over data files in data directory
for (file in dir(owler_dir, pattern = '\\.csv$')) {
cat(sprintf('\n\ndata file %s\n', file)) ## echo progress
## load data
full_file_path <- file.path(owler_dir, file)
df <- read.csv(full_file_path, stringsAsFactors = F, na.strings = na.strings)
## append verices
vt <- rbind(vt, df)
## select competitor column names of the form: "competitor_<number>"
cols <- names(df)
compcols <- cols[grep('competitor_\\d{1,}',x = cols)] ## \\d{1,} is integer of 1+ digits
## if no competitor data columns or missing company_name_unique column,
## then skip to next data file
if (length(compcols)==0 | !('company_name_unique' %in% names(df)))
next
## loop over firms in data file
for (i in 1:nrow(df)) {
## firm i
firm_i <- df[i, 'company_name_unique']
## select competitors of firm i
firm_i_comps <- unlist(df[i, compcols]) ## unlist from data.frame to vector
## skip if firm i has no company_name_unique
if (is.na(firm_i))
next
## skip rows with no competitors included
if (all(sapply(firm_i_comps, is.na)))
next
## loop over each competitor j of firm i
for (j in 1:length(firm_i_comps)) {
comp_j <- unname(firm_i_comps[j])
if (!is.na(comp_j)) {
tmp_el <- data.frame(source=firm_i, target=comp_j, rank=j, weight=1)
## append competitor relation
el <- rbind(el, tmp_el)
}
}
if (i %% 50 == 0) cat(sprintf('firm %s %s\n', i, firm_i)) ## echo progress
}
}
##
##
## data file Ford.csv
## firm 100 dodgeofftworth
## firm 150 physioroom
## firm 200 alldata
##
##
## data file NYTimes.csv
## firm 50 lex-machina
## firm 100 linkedin
## firm 150 marketaxess
## firm 200 electra-information-systems
## firm 250 iri
## firm 300 cable-one
## firm 350 salem-media-group
The target
column is not of the format like company_name_unique
which must be fixed or else the firm names cannot be matched against the vertex dataframe to create the graph data frame.
## check vertices
dim(vt)
## [1] 724 28
head(vt)
## name company_name_unique founded_year founded_date closed_date
## 1 Ford ford 1963 1963/6/16 <NA>
## 2 Toyota toyota-global 1903 1903/6/17 <NA>
## 3 Honda honda 1948 1948/9/24 <NA>
## 4 Chevrolet chevrolet 1911 1911/11/1 <NA>
## 5 Nissan nissan-global 1933 1933/12/26 <NA>
## 6 Hyundai Motor hyundai 1967 1967/12/29 <NA>
## acquired_date acquired_by_company_name_unique employees
## 1 <NA> <NA> 202,000
## 2 <NA> <NA> 369,124
## 3 <NA> <NA> 215,638
## 4 <NA> <NA> 20,000
## 5 <NA> <NA> 152,421
## 6 <NA> <NA> 63,099
## domain total_funding_usd annual_revenue
## 1 ford.com $756M $158.7B
## 2 toyota-global.com $ - $470.2B
## 3 honda.com $ - $138.9B
## 4 chevrolet.com $ - $11.5B
## 5 nissan-global.com $ - $106.1B
## 6 hyundai.com $ - $73.1B
## hq_region status ipo_date independence
## 1 DearbornMichigan Public 1956-01-01 Y
## 2 Toyota CityAichi Prefecture Public 1978-01-13 Y
## 3 Minato-kuTÅkyÅ Prefecture Public 1978-01-13 Y
## 4 Oshawa,Ontario Private <NA> Y
## 5 Yokohama-shiKanagawa Prefecture Private <NA> Y
## 6 SeoulSeoul Teugbyeolsi Public 2003-01-10 Y
## stock_symbol_1 stock_symbol_2 competitor_1 competitor_2 competitor_3
## 1 NYSE <NA> Toyota Honda Chevrolet
## 2 NYSE <NA> Nissan Honda Ford Motor
## 3 BMV <NA> Toyota Nissan Ford Motor
## 4 <NA> <NA> Hyundai Motor Ford Motor Kia
## 5 <NA> <NA> Hyundai Motor Acura Honda
## 6 Korea Exchange <NA> Nissan Mercedes-Benz Mazda Motor
## competitor_4 competitor_5 competitor_6 competitor_7
## 1 Nissan Hyundai Motor Volkswagen Volkswagen
## 2 Volkswagen General Motors Hyundai Motor BMW Group
## 3 Volkswagen Hyundai Motor General Motors Chrysler
## 4 Nissan Toyota Honda Volkswagen
## 5 Peugeot Toyota Motor Sales Volkswagen SAIC Motor
## 6 Honda Volkswagen Audi Toyota
## competitor_8 competitor_9 competitor_10 notes
## 1 General Motors NA NA <NA>
## 2 Tata Motors NA NA <NA>
## 3 <NA> NA NA <NA>
## 4 Chrysler NA NA <NA>
## 5 Toyota NA NA <NA>
## 6 General Motors NA NA <NA>
## check edge list
dim(el)
## [1] 4122 4
head(el, 20)
## source target rank weight
## 1 ford Toyota 1 1
## 2 ford Honda 2 1
## 3 ford Chevrolet 3 1
## 4 ford Nissan 4 1
## 5 ford Hyundai Motor 5 1
## 6 ford Volkswagen 6 1
## 7 ford Volkswagen 7 1
## 8 ford General Motors 8 1
## 9 toyota-global Nissan 1 1
## 10 toyota-global Honda 2 1
## 11 toyota-global Ford Motor 3 1
## 12 toyota-global Volkswagen 4 1
## 13 toyota-global General Motors 5 1
## 14 toyota-global Hyundai Motor 6 1
## 15 toyota-global BMW Group 7 1
## 16 toyota-global Tata Motors 8 1
## 17 honda Toyota 1 1
## 18 honda Nissan 2 1
## 19 honda Ford Motor 3 1
## 20 honda Volkswagen 4 1
We need to update target
column by using the mapping name
–>company_name_unique
that we already have in the vertex dataframe.
# company name to company_name_unique mapping
mapping <- vt[,c('name','company_name_unique')]
names(mapping) <- c('target','target_name_unique')
## merge the original edge list and company_name_unique mapping
el2 <- merge(el, mapping, by.x='target', by.y='target', all.x=T, all.y=F)
## replace original target column with new mapped target_name_unique
el2$target <- el2$target_name_unique
## finally remove the temporary target_name_unique column
el2$target_name_unique <- NULL
head(el2)
## target source rank weight
## 1 toyota-global bmwgroup 2 1
## 2 toyota-global nissan-global 8 1
## 3 toyota-global hyundai 7 1
## 4 toyota-global mercedes-benz 6 1
## 5 toyota-global volkswagenag 1 1
## 6 toyota-global tatamotors 4 1
## write edge list to csv file
el_file <- file.path(data_dir, 'owler_edge_list.csv')
write.csv(el, file = el_file, row.names = F)
## write vertex list to csv file
el_file <- file.path(data_dir, 'owler_vertex_list.csv')
write.csv(vt, file = el_file, row.names = F)