Effects of product network relationships on demand in Russian ecommerce
This research analyzes the relationship and influence of the values contained in the Product Recommendation Network, and how they impact on an e-commerce’s demand. We carried out an empirical analysis of the TV category in a major e-commerce from Russia.
Рубрика | Менеджмент и трудовые отношения |
Вид | дипломная работа |
Язык | английский |
Дата добавления | 27.08.2020 |
Размер файла | 3,4 M |
Отправить свою хорошую работу в базу знаний просто. Используйте форму, расположенную ниже
Студенты, аспиранты, молодые ученые, использующие базу знаний в своей учебе и работе, будут вам очень благодарны.
if "Купили более" in name:
salary_all_count = name.split()[2]
elif "за сегодня" in name and "покуп" in name:
salary_today_count = name.split()[0]
elif "за неделю" in name and "покуп" in name:
salary_week_count = name.split()[0]
print(
"ID:" + id + " Name:" + item_name + " Price:" + item_price + " Score:" + item_score + " Sale:" + item_sale + " salary all count:" + salary_all_count + " salary today count:" + salary_today_count + " salary week count:" + salary_week_count + " Reviews:" + reviews)
driver.close()
drivers.remove(driver)
data = ",".join(
[id, item_name, item_price, item_score, item_sale, salary_all_count, salary_today_count, salary_week_count,
reviews])
connect = sqlite3.connect("database.sqlite") # или :memory: чтобы сохранить в RAM
connect.cursor().execute("""INSERT INTO items VALUES (?,?)""", [id, data])
connect.commit()
connect.close()
return data + "," + type
def parse(url, pack, reload=0):
print("---MAIN---")
driver = webdriver.Firefox(options=options)
drivers.append(driver)
driver.set_window_size(1366, 9000) # because firefox not scroll to element
driver.implicitly_wait(wait_time) # seconds
driver.get(url)
main_data = load(url, "main")
if main_data is None:
return
print("---RECOMMENDS---")
recommends = driver.find_elements_by_css_selector('[data-widget="skuShelfCompare"]>div>div>div>div>div>div>a')
current_sleep = 0
while len(recommends) == 0:
current_sleep += 1
if current_sleep > 2:
if reload > 1:
break
driver.close()
drivers.remove(driver)
parse(url, pack, reload+1)
return
time.sleep(1)
print("recWhile")
recommends_data = []
for element in recommends:
recommends_temp_data = load(element.get_property("href").split("?")[0], "recommends")
recommends_data.append(recommends_temp_data)
print("---SPONSORED---")
sponsored = driver.find_elements_by_css_selector('[data-widget="skuShelfGoods"][title="Спонсорские товары"] a')
current_sleep = 0
while len(sponsored) == 0:
current_sleep += 1
if current_sleep > 2:
if reload > 1:
break
driver.close()
drivers.remove(driver)
parse(url, pack, reload+1)
return
driver.save_screenshot("sponsored_screen.png")
print("sponsoredWhile")
time.sleep(1)
sponsored = driver.find_elements_by_css_selector('[data-widget="skuShelfGoods"][title="Спонсорские товары"] a')
sponsored_data = []
for element in sponsored:
sponsored_temp_data = load(element.get_property("href").split("?")[0], "sponsored")
sponsored_data.append(sponsored_temp_data)
print("---ALSO-BUYED---")
also_buyed = driver.find_elements_by_css_selector(
"#__nuxt>div>div.block-vertical>div:nth-child(6)>div>div:nth-child(2)>div>div:nth-child(4) a")
also_buyed_data = []
for element in also_buyed:
also_buyed_data_temp = load(element.get_property("href").split("?")[0], "also_buy")
also_buyed_data.append(also_buyed_data_temp)
driver.close()
drivers.remove(driver)
with open('data/data' + pack + '.csv', 'a') as csvfile:
writer = csv.writer(csvfile, delimiter=';')
writer.writerow([main_data] + recommends_data + sponsored_data + also_buyed_data)
csvfile.close()
def get_id(url):
return list(filter(lambda e: e != '', re.split(r'[\-/]', url)))[-1]
# parse("https://www.ozon.ru/context/detail/id/154925584/", "test")
Appendix B
Code of clustering data
ozon_tv <- read.csv("ozon_finalv3.csv", header=TRUE, sep=",")
#CLustering analysis for main products
library(dplyr) # for data cleaning
library(ISLR) # for college dataset
library(cluster) # for gower similarity and pam
library(Rtsne) # for t-SNE plot
library(ggplot2) # for visualization
# Remove college name before clustering and little bit prepare the data
main_ozon_tv<-ozon_tv[, c(-1, -9: -17)]
main_ozon_tv$Brand = as.factor(main_ozon_tv$Brand)
main_ozon_tv$Product.Name = as.factor(main_ozon_tv$Product.Name)
main_ozon_dist <- daisy (main_ozon_tv, metric = "gower", type = list(logratio = 3))
# Check attributes to ensure the correct methods are being used
summary(main_ozon_dist)
#Create matrix
df_mat <- as.matrix(main_ozon_dist)
# Output most similar pair
main_ozon_tv[which(df_mat == max(df_mat[df_mat != max(df_mat)]),
arr.ind = TRUE)[1, ], ]
#Choosing a clustering algorithm #Calculate silhouette width for many k using PAM
sil_width <- c(NA)
for(i in 2:12){
pam_fit <- pam(main_ozon_dist,
diss = TRUE,
k = i)
sil_width[i] <- pam_fit$silinfo$avg.width
}
# Plot sihouette width (higher is better)
plot(1:12, sil_width,
xlab = "Number of clusters",
ylab = "Silhouette Width")
lines(1:12, sil_width)
#Now we understand, that we have to use 7 clasters, let`s do it!
#Cluster Interpretation
library(cluster)
library(ISLR)
pam_fit <- pam(main_ozon_dist, diss = TRUE, k = 7)
# IT works
main_ozon_tv[pam_fit$medoids, ]
#Plotting the results
library(Rtsne)
tsne_obj <- Rtsne(main_ozon_dist, is_distance = TRUE)
tsne_data <- tsne_obj$Y %>%
data.frame() %>%
setNames(c("X", "Y")) %>%
mutate(cluster = factor(pam_fit$clustering))
#Plot the results
ggplot(aes(x = X, y = Y), data = tsne_data) +
geom_point(aes(color = cluster))
#Save the segments inside our dataset
main_ozon_tv$segments = tsne_data$cluster
#Create dataset with relusts of our segmentation
seg_main_ozon=main_ozon_tv %>% group_by(segments) %>%summarise(avg_rating_main = mean(Rating_main), avg_price_main = mean(Price_main),avg_discout_main = mean(Discount_main),avg_review_main = mean(Reviews_main),avg_sales_main=(mean(Sales_main)))
main_ozon_tv$Brand = as.character(main_ozon_tv$Brand)
main_ozon_tv$Product.Name = as.character(main_ozon_tv$Product.Name)
seg_main_sales = main_ozon_tv %>% group_by(segments) %>% count(mean(Sales_main)) %>% filter(n==max(n))
seg_main_brand = main_ozon_tv %>% group_by(segments) %>% count(Brand) %>% filter(n==max(n))
seg_main
#Final cluster
seg_main_all = main_ozon_tv %>% group_by(segments) %>% count(Product.Name, mean(Sales_main),mean(Price_main), mean(Rating_main),mean(Discount_main), mean(Reviews_main)) %>% filter(n==max(n))
main_ozon_tv
#Numbers of our users by segments
main_ozon_tv%>% group_by(segments) %>% tally()
#Clustering analysis for also viewed products
library(dplyr) # for data cleaning
library(ISLR) # for college dataset
library(cluster) # for gower similarity and pam
library(Rtsne) # for t-SNE plot
library(ggplot2) # for visualization
# Remove college name before clustering and little bit prepare the data
av_ozon_tv<-ozon_tv[, c(-1, -4: -8, -14:-17)]
av_ozon_tv$Brand = as.factor(av_ozon_tv$Brand)
av_ozon_tv$Product.Name = as.factor(av_ozon_tv$Product.Name)
av_ozon_dist <- daisy (av_ozon_tv, metric = "gower", type = list(logratio = 3))
# Check attributes to ensure the correct methods are being used
summary(av_ozon_dist)
#Create matrix
av_df_mat <- as.matrix(av_ozon_dist)
# Output most similar pair
av_ozon_tv[which(av_df_mat == max(av_df_mat[av_df_mat != max(av_df_mat)]),
arr.ind = TRUE)[1, ], ]
#Choosing a clustering algorithm
#Calculate silhouette width for many k using PAM
sil_width <- c(NA)
for(i in 2:10){
pam_fit <- pam(av_ozon_dist,
diss = TRUE,
k = i)
sil_width[i] <- pam_fit$silinfo$avg.width
}
# Plot sihouette width (higher is better)
plot(1:10, sil_width,
xlab = "Number of clusters",
ylab = "Silhouette Width")
lines(1:10, sil_width)
#Now we understand, that we have to use 7 clasters, let`s do it!
#Cluster Interpretation
library(cluster)
library(ISLR)
pam_fit <- pam(av_ozon_dist, diss = TRUE, k = 7)
# IT works
av_ozon_tv[pam_fit$medoids, ]
#Plotting the results
library(Rtsne)
av_tsne_obj <- Rtsne(av_ozon_dist, is_distance = TRUE)
av_tsne_obj <- av_tsne_obj$Y %>%
data.frame() %>%
setNames(c("X", "Y")) %>%
mutate(cluster = factor(pam_fit$clustering))
#Plot the results
ggplot(aes(x = X, y = Y), data = av_tsne_obj) +
geom_point(aes(color = cluster))
#Save the segments inside our dataset
av_ozon_tv$segments = av_tsne_obj$cluster
#Create dataset with relusts of our segmentation
seg_av_ozon=av_ozon_tv %>% group_by(segments) %>% summarise(avg_sales_av=mean(SAL_av),avg_price_av = mean(AP_av),avg_rating_av = mean(AR_av),avg_discount_av = mean(AD_av),avg_number_of_reviews_av = (mean(ANR_av)))
av_ozon_tv$Brand = as.character(av_ozon_tv$Brand)
av_ozon_tv$Product.Name = as.character(av_ozon_tv$Product.Name)
seg_av_sales = av_ozon_tv %>% group_by(segments) %>% count(mean(SAL_av)) %>% filter(n==max(n))
seg_av_brand = av_ozon_tv %>% group_by(segments) %>% count(Brand) %>% filter(n==max(n))
#Final cluster
seg_av_all = av_ozon_tv %>% group_by(segments) %>% count(Product.Name, mean(SAL_av),mean(AP_av),mean(AR_av), mean(AD_av)) %>% filter(n==max(n))
av_ozon_tv
#Numbers of our users by segments
av_ozon_tv%>% group_by(segments) %>% tally()
seg_lm_ozon<-merge(seg_main_ozon,seg_av_ozon)
#LM for seg_mean_ozon + seg_av_prod
library(car)
LM_main <- lm(avg_sales_main ~ avg_review_main + avg_rating_main + avg_sales_av + avg_price_av + avg_rating_av + avg_discount_av + avg_number_of_reviews_av, data=seg_lm_ozon)
summary(LM_main)
#Clustering analysis for co-purchased products
library(dplyr) # for data cleaning
library(ISLR) # for college dataset
library(cluster) # for gower similarity and pam
library(Rtsne) # for t-SNE plot
library(ggplot2) # for visualization
# Remove college name before clustering and little bit prepare the data
cp_ozon_tv<-ozon_tv[, c(-1, -4: -14)]
#here I deleted CO-purchase coloumn (binary)
cp_ozon_tv$Brand = as.factor(cp_ozon_tv$Brand)
cp_ozon_tv$Product.Name = as.factor(cp_ozon_tv$Product.Name)
cp_ozon_dist <- daisy (cp_ozon_tv, metric = "gower", type = list(logratio = 3))
# Check attributes to ensure the correct methods are being used
summary(cp_ozon_dist)
#Create matrix
cp_df_mat <- as.matrix(cp_ozon_dist)
# Output most similar pair
cp_ozon_tv[which(cp_df_mat == max(cp_df_mat[cp_df_mat != max(cp_df_mat)]),
arr.ind = TRUE)[1, ], ]
#Choosing a clustering algorithm
#Calculate silhouette width for many k using PAM
sil_width <- c (NA)
for(i in 2:15){
pam_fit <- pam(cp_ozon_dist,
diss = TRUE,
k = i)
sil_width[i] <- pam_fit$silinfo$avg.width
}
# Plot sihouette width (higher is better)
plot(1:15, sil_width,
xlab = "Number of clusters",
ylab = "Silhouette Width")
lines(1:15, sil_width)
#Now we understand, that we have to use 11 clasters, let`s do it!
#Cluster Interpretation
library(cluster)
library(ISLR)
pam_fit <- pam(av_ozon_dist, diss = TRUE, k = 11)
# IT works
cp_ozon_tv[pam_fit$medoids, ]
#Plotting the results
library(Rtsne)
cp_tsne_obj <- Rtsne(cp_ozon_dist, is_distance = TRUE)
cp_tsne_obj <- cp_tsne_obj$Y %>%
data.frame() %>%
setNames(c("X", "Y")) %>%
mutate(cluster = factor(pam_fit$clustering))
#Plot the results
ggplot(aes(x = X, y = Y), data = cp_tsne_obj) +
geom_point(aes(color = cluster))
#Save the segments inside our dataset
cp_ozon_tv$segments = cp_tsne_obj$cluster
#Create dataset with relusts of our segmentation
seg_cp_ozon=cp_ozon_tv %>% group_by(segments) %>% summarise(avg_price_av = mean(AP_cp),avg_rating_av = mean(AR_cp),avg_number_of_reviews_av = (mean(ANR_cp)))
cp_ozon_tv$Brand = as.character(cp_ozon_tv$Brand)
cp_ozon_tv$Product.Name = as.character(cp_ozon_tv$Product.Name)
seg_cp_price = cp_ozon_tv %>% group_by(segments) %>% count(mean(AP_cp)) %>% filter(n==max(n))
seg_cp_brand = cp_ozon_tv %>% group_by(segments) %>% count(Brand) %>% filter(n==max(n))
#Final cluster
seg_cp_all = cp_ozon_tv %>% group_by(segments) %>% count(Product.Name, mean(AP_cp), mean(AR_cp), mean(ANR_cp)) %>% filter(n==max(n))
cp_ozon_tv
#Numbers of our users by segments
cp_ozon_tv%>% group_by(segments) %>% tally()
#First regression attempts
library(car)
LM_control <- lm(Sales_main ~ Price_main + Rating_main + Discount_main + Reviews_main, data=ozon_tv)
summary(LM_control)
LM_sales <- lm(Sales_main ~ .-ID -Brand -Product.Name -Co_purchase, data=ozon_tv)
summary(LM_sales)
LM_rating <- lm(Rating_main ~ .-ID -Brand -Product.Name -Co_purchase, data=ozon_tv)
summary(LM_rating)
LM_reviews <- lm(Reviews_main ~ .-ID -Brand -Product.Name -Co_purchase, data=ozon_tv)
summary(LM_reviews)
LogM_sales <- lm(log(Sales_main) ~ log(Discount_main), data=ozon_tv)
summary(LogM_sales)
#Descriptive statistics
install.packages("pastecs")
library(pastecs)
descriptive <- stat.desc(ozon_tv[, 4:13])
round(descriptive, 2)
head(descriptive)
descriptive
str(descriptive)
decriptive_table <- table(descriptive)
as.data.frame(descriptive)
descriptive
install.packages("ggpubr")
library(ggpubr)
#ratings
ggboxplot(ozon_tv, y = "Rating_main", width = 0.5)
#reviews
ggboxplot(ozon_tv, y = "Reviews_main", width = 0.5)
#Discount
ggboxplot(ozon_tv, y = "Discount_main", width = 0.5)
#Price
ggboxplot(ozon_tv, y = "Price_main", width = 0.5)
#Sales
ggboxplot(ozon_tv, y = "Sales_main", width = 0.5)
#histograms
#sales
gghistogram(ozon_tv, x = "Sales_main", bins = 9,
add = "mean")
gghistogram(ozon_tv, x = "Rating_main", bins = 9,
add = "mean")
gghistogram(ozon_tv, x = "Discount_main", bins = 9,
add = "mean")
#qqplots
ggqqplot(ozon_tv, x = "Reviews_main")
ggqqplot(ozon_tv, x = "Rating_main")
ggqqplot(ozon_tv, x = "Sales_main")
#Sales by brand
ozon_brands <- read.csv("Ozon_brands.csv", header=TRUE, sep=",")
ggboxplot(ozon_brands, x = "Brand", y = "Sales_main",
color = "Brand")
ggstripchart(ozon_brands, x = "Brand", y = "Sales_main",
color = "Brand",
add = "mean_sd")
ggbarplot(df, x = "Brands", y = "Sales",
color = "Eye", position = position_dodge(),
palette = c("brown", "blue", "gold", "green"))
#Fitting the model
#Regression without outliers
#Without outliers first attempt
ozon_outliers <- read.csv("Ozon_outliers.csv", header=TRUE, sep=",")
LM_sales_outliers <- lm(Sales_main ~ .-ID -Brand -Product.Name -Co_purchase, data=ozon_outliers)
summary(LM_sales_outliers)
LM_rating_outliers <- lm(Rating_main ~ .-ID -Brand -Product.Name -Co_purchase, data=ozon_outliers)
summary(LM_rating_outliers)
LM_reviews_outliers <- lm(Reviews_main ~ .-ID -Brand -Product.Name -Co_purchase, data=ozon_outliers)
summary(LM_reviews_outliers)
#Without outliers second attempt
ozon_outliers2 <- read.csv("Ozon_outliers2.csv", header=TRUE, sep=",")
LM_sales_outliers2 <- lm(Sales_main ~ .-ID -Brand -Product.Name -Co_purchase, data=ozon_outliers2)
summary(LM_sales_outliers2)
LM_rating_outliers2 <- lm(Rating_main ~ .-ID -Brand -Product.Name -Co_purchase, data=ozon_outliers2)
summary(LM_rating_outliers2)
LM_reviews_outliers2 <- lm(Reviews_main ~ .-ID -Brand -Product.Name -Co_purchase, data=ozon_outliers2)
summary(LM_reviews_outliers2)
Размещено на Allbest.ru
...Подобные документы
Origins of and reasons for product placement: history of product placement in the cinema, sponsored shows. Factors that can influence the cost of a placement. Branded entertainment in all its forms: series and television programs, novels and plays.
курсовая работа [42,1 K], добавлен 16.10.2013Improving the business processes of customer relationship management through automation. Solutions the problem of the absence of automation of customer related business processes. Develop templates to support ongoing processes of customer relationships.
реферат [173,6 K], добавлен 14.02.2016Оргтехника как основа для работы офиса, ее типы и функциональные особенности, значение. Необходимость использования компьютера, ее обоснование. Информационные системы в управлении и принципы их формирования. Модели продаж CRM-систем On-demand (или SaaS).
курсовая работа [1,6 M], добавлен 01.04.2012Evaluation of urban public transport system in Indonesia, the possibility of its effective development. Analysis of influence factors by using the Ishikawa Cause and Effect diagram and also the use of Pareto analysis. Using business process reengineering.
контрольная работа [398,2 K], добавлен 21.04.2014The impact of management and leadership styles on strategic decisions. Creating a leadership strategy that supports organizational direction. Appropriate methods to review current leadership requirements. Plan for the development of future situations.
курсовая работа [36,2 K], добавлен 20.05.2015Selected aspects of stimulation of scientific thinking. Meta-skills. Methods of critical and creative thinking. Analysis of the decision-making methods without use of numerical values of probability (exemplificative of the investment projects).
аттестационная работа [196,7 K], добавлен 15.10.2008Рассмотрение концепции Customer Relationship Management по управлению взаимоотношениями с клиентами. Возможности CRM-систем, их влияние на эффективность бизнеса. Разработка, реализация и стоимость проекта внедрения CRM-системы для ЗАО "Сибтехнология".
дипломная работа [5,5 M], добавлен 15.09.2012Critical literature review. Apparel industry overview: Porter’s Five Forces framework, PESTLE, competitors analysis, key success factors of the industry. Bershka’s business model. Integration-responsiveness framework. Critical evaluation of chosen issue.
контрольная работа [29,1 K], добавлен 04.10.2014Impact of globalization on the way organizations conduct their businesses overseas, in the light of increased outsourcing. The strategies adopted by General Electric. Offshore Outsourcing Business Models. Factors for affect the success of the outsourcing.
реферат [32,3 K], добавлен 13.10.2011Major factors of success of managers. Effective achievement of the organizational purposes. Use of "emotional investigation". Providing support to employees. That is appeal charisma. Positive morale and recognition. Feedback of the head with workers.
презентация [1,8 M], добавлен 15.07.2012Сущность CRM-систем - Customer Relationship Management. Преимущества клиенториентированного подхода к бизнесу. Формы функционирования и классификация CRM-систем. Основные инструменты, которые включает в себя технология управления отношениями с клиентами.
реферат [30,9 K], добавлен 12.01.2011Six principles of business etiquette survival or success in the business world. Punctuality, privacy, courtesy, friendliness and affability, attention to people, appearance, literacy speaking and writing as the major commandments of business man.
презентация [287,1 K], добавлен 21.10.2013Discussion of organizational culture. The major theories of personality. Social perception, its elements and common barriers. Individual and organizational influences on ethical behavior. The psychophysiology of the stress response.
контрольная работа [27,7 K], добавлен 19.11.2012Relevance of electronic document flow implementation. Description of selected companies. Pattern of ownership. Sectorial branch. Company size. Resources used. Current document flow. Major advantage of the information system implementation in the work.
курсовая работа [128,1 K], добавлен 14.02.2016Понятие и сущность мотивации трудовой деятельности персонала. Особенности применения методов стимулирования в коммерческих организациях на примере Levi’s Russia. Методы нематериального стимулирования персонала. Вклад сотрудника в прибыль компании.
курсовая работа [27,8 K], добавлен 15.05.2014Searching for investor and interaction with him. Various problems in the project organization and their solutions: design, page-proof, programming, the choice of the performers. Features of the project and the results of its creation, monetization.
реферат [22,0 K], добавлен 14.02.2016Analysis of the peculiarities of the mobile applications market. The specifics of the process of mobile application development. Systematization of the main project management methodologies. Decision of the problems of use of the classical methodologies.
контрольная работа [1,4 M], добавлен 14.02.2016Description of the structure of the airline and the structure of its subsystems. Analysis of the main activities of the airline, other goals. Building the “objective tree” of the airline. Description of the environmental features of the transport company.
курсовая работа [1,2 M], добавлен 03.03.2013The concept and features of bankruptcy. Methods prevent bankruptcy of Russian small businesses. General characteristics of crisis management. Calculating the probability of bankruptcy discriminant function in the example of "Kirov Plant "Mayak".
курсовая работа [74,5 K], добавлен 18.05.2015Value and probability weighting function. Tournament games as special settings for a competition between individuals. Model: competitive environment, application of prospect theory. Experiment: design, conducting. Analysis of experiment results.
курсовая работа [1,9 M], добавлен 20.03.2016