Effects of product network relationships on demand in Russian ecommerce

This research analyzes the relationship and influence of the values contained in the Product Recommendation Network, and how they impact on an e-commerce’s demand. We carried out an empirical analysis of the TV category in a major e-commerce from Russia.

Рубрика Менеджмент и трудовые отношения
Вид дипломная работа
Язык английский
Дата добавления 27.08.2020
Размер файла 3,4 M

Отправить свою хорошую работу в базу знаний просто. Используйте форму, расположенную ниже

Студенты, аспиранты, молодые ученые, использующие базу знаний в своей учебе и работе, будут вам очень благодарны.

if "Купили более" in name:

salary_all_count = name.split()[2]

elif "за сегодня" in name and "покуп" in name:

salary_today_count = name.split()[0]

elif "за неделю" in name and "покуп" in name:

salary_week_count = name.split()[0]

print(

"ID:" + id + " Name:" + item_name + " Price:" + item_price + " Score:" + item_score + " Sale:" + item_sale + " salary all count:" + salary_all_count + " salary today count:" + salary_today_count + " salary week count:" + salary_week_count + " Reviews:" + reviews)

driver.close()

drivers.remove(driver)

data = ",".join(

[id, item_name, item_price, item_score, item_sale, salary_all_count, salary_today_count, salary_week_count,

reviews])

connect = sqlite3.connect("database.sqlite") # или :memory: чтобы сохранить в RAM

connect.cursor().execute("""INSERT INTO items VALUES (?,?)""", [id, data])

connect.commit()

connect.close()

return data + "," + type

def parse(url, pack, reload=0):

print("---MAIN---")

driver = webdriver.Firefox(options=options)

drivers.append(driver)

driver.set_window_size(1366, 9000) # because firefox not scroll to element

driver.implicitly_wait(wait_time) # seconds

driver.get(url)

main_data = load(url, "main")

if main_data is None:

return

print("---RECOMMENDS---")

recommends = driver.find_elements_by_css_selector('[data-widget="skuShelfCompare"]>div>div>div>div>div>div>a')

current_sleep = 0

while len(recommends) == 0:

current_sleep += 1

if current_sleep > 2:

if reload > 1:

break

driver.close()

drivers.remove(driver)

parse(url, pack, reload+1)

return

time.sleep(1)

print("recWhile")

recommends_data = []

for element in recommends:

recommends_temp_data = load(element.get_property("href").split("?")[0], "recommends")

recommends_data.append(recommends_temp_data)

print("---SPONSORED---")

sponsored = driver.find_elements_by_css_selector('[data-widget="skuShelfGoods"][title="Спонсорские товары"] a')

current_sleep = 0

while len(sponsored) == 0:

current_sleep += 1

if current_sleep > 2:

if reload > 1:

break

driver.close()

drivers.remove(driver)

parse(url, pack, reload+1)

return

driver.save_screenshot("sponsored_screen.png")

print("sponsoredWhile")

time.sleep(1)

sponsored = driver.find_elements_by_css_selector('[data-widget="skuShelfGoods"][title="Спонсорские товары"] a')

sponsored_data = []

for element in sponsored:

sponsored_temp_data = load(element.get_property("href").split("?")[0], "sponsored")

sponsored_data.append(sponsored_temp_data)

print("---ALSO-BUYED---")

also_buyed = driver.find_elements_by_css_selector(

"#__nuxt>div>div.block-vertical>div:nth-child(6)>div>div:nth-child(2)>div>div:nth-child(4) a")

also_buyed_data = []

for element in also_buyed:

also_buyed_data_temp = load(element.get_property("href").split("?")[0], "also_buy")

also_buyed_data.append(also_buyed_data_temp)

driver.close()

drivers.remove(driver)

with open('data/data' + pack + '.csv', 'a') as csvfile:

writer = csv.writer(csvfile, delimiter=';')

writer.writerow([main_data] + recommends_data + sponsored_data + also_buyed_data)

csvfile.close()

def get_id(url):

return list(filter(lambda e: e != '', re.split(r'[\-/]', url)))[-1]

# parse("https://www.ozon.ru/context/detail/id/154925584/", "test")

Appendix B

Code of clustering data

ozon_tv <- read.csv("ozon_finalv3.csv", header=TRUE, sep=",")

#CLustering analysis for main products

library(dplyr) # for data cleaning

library(ISLR) # for college dataset

library(cluster) # for gower similarity and pam

library(Rtsne) # for t-SNE plot

library(ggplot2) # for visualization

# Remove college name before clustering and little bit prepare the data

main_ozon_tv<-ozon_tv[, c(-1, -9: -17)]

main_ozon_tv$Brand = as.factor(main_ozon_tv$Brand)

main_ozon_tv$Product.Name = as.factor(main_ozon_tv$Product.Name)

main_ozon_dist <- daisy (main_ozon_tv, metric = "gower", type = list(logratio = 3))

# Check attributes to ensure the correct methods are being used

summary(main_ozon_dist)

#Create matrix

df_mat <- as.matrix(main_ozon_dist)

# Output most similar pair

main_ozon_tv[which(df_mat == max(df_mat[df_mat != max(df_mat)]),

arr.ind = TRUE)[1, ], ]

#Choosing a clustering algorithm #Calculate silhouette width for many k using PAM

sil_width <- c(NA)

for(i in 2:12){

pam_fit <- pam(main_ozon_dist,

 diss = TRUE,

 k = i)

sil_width[i] <- pam_fit$silinfo$avg.width

}

# Plot sihouette width (higher is better)

plot(1:12, sil_width,

xlab = "Number of clusters",

ylab = "Silhouette Width")

lines(1:12, sil_width)

#Now we understand, that we have to use 7 clasters, let`s do it!

#Cluster Interpretation

library(cluster) 

library(ISLR)

pam_fit <- pam(main_ozon_dist, diss = TRUE, k = 7)

# IT works

main_ozon_tv[pam_fit$medoids, ]

#Plotting the results

library(Rtsne)

tsne_obj <- Rtsne(main_ozon_dist, is_distance = TRUE)

tsne_data <- tsne_obj$Y %>%

data.frame() %>%

setNames(c("X", "Y")) %>%

mutate(cluster = factor(pam_fit$clustering))

#Plot the results

ggplot(aes(x = X, y = Y), data = tsne_data) +

geom_point(aes(color = cluster))

#Save the segments inside our dataset

main_ozon_tv$segments = tsne_data$cluster

#Create dataset with relusts of our segmentation

seg_main_ozon=main_ozon_tv %>% group_by(segments) %>%summarise(avg_rating_main = mean(Rating_main), avg_price_main = mean(Price_main),avg_discout_main = mean(Discount_main),avg_review_main = mean(Reviews_main),avg_sales_main=(mean(Sales_main)))

main_ozon_tv$Brand = as.character(main_ozon_tv$Brand)

main_ozon_tv$Product.Name = as.character(main_ozon_tv$Product.Name)

seg_main_sales = main_ozon_tv %>% group_by(segments) %>% count(mean(Sales_main)) %>% filter(n==max(n))

seg_main_brand = main_ozon_tv %>% group_by(segments) %>% count(Brand) %>% filter(n==max(n))

seg_main

#Final cluster

seg_main_all = main_ozon_tv %>% group_by(segments) %>% count(Product.Name, mean(Sales_main),mean(Price_main), mean(Rating_main),mean(Discount_main), mean(Reviews_main)) %>% filter(n==max(n))

main_ozon_tv

#Numbers of our users by segments

main_ozon_tv%>% group_by(segments) %>% tally()

#Clustering analysis for also viewed products 

library(dplyr) # for data cleaning

library(ISLR) # for college dataset

library(cluster) # for gower similarity and pam

library(Rtsne) # for t-SNE plot

library(ggplot2) # for visualization

# Remove college name before clustering and little bit prepare the data

av_ozon_tv<-ozon_tv[, c(-1, -4: -8, -14:-17)]

av_ozon_tv$Brand = as.factor(av_ozon_tv$Brand)

av_ozon_tv$Product.Name = as.factor(av_ozon_tv$Product.Name)

av_ozon_dist <- daisy (av_ozon_tv, metric = "gower", type = list(logratio = 3))

# Check attributes to ensure the correct methods are being used

summary(av_ozon_dist)

#Create matrix

av_df_mat <- as.matrix(av_ozon_dist)

# Output most similar pair

av_ozon_tv[which(av_df_mat == max(av_df_mat[av_df_mat != max(av_df_mat)]),

 arr.ind = TRUE)[1, ], ]

#Choosing a clustering algorithm

#Calculate silhouette width for many k using PAM

sil_width <- c(NA)

for(i in 2:10){

pam_fit <- pam(av_ozon_dist,

 diss = TRUE,

 k = i)

sil_width[i] <- pam_fit$silinfo$avg.width

}

# Plot sihouette width (higher is better)

plot(1:10, sil_width,

xlab = "Number of clusters",

ylab = "Silhouette Width")

lines(1:10, sil_width)

#Now we understand, that we have to use 7 clasters, let`s do it!

#Cluster Interpretation

library(cluster) 

library(ISLR)

pam_fit <- pam(av_ozon_dist, diss = TRUE, k = 7)

# IT works

av_ozon_tv[pam_fit$medoids, ]

#Plotting the results

library(Rtsne)

av_tsne_obj <- Rtsne(av_ozon_dist, is_distance = TRUE)

av_tsne_obj <- av_tsne_obj$Y %>%

data.frame() %>%

setNames(c("X", "Y")) %>%

mutate(cluster = factor(pam_fit$clustering))

#Plot the results

ggplot(aes(x = X, y = Y), data = av_tsne_obj) +

geom_point(aes(color = cluster))

#Save the segments inside our dataset

av_ozon_tv$segments = av_tsne_obj$cluster

#Create dataset with relusts of our segmentation

seg_av_ozon=av_ozon_tv %>% group_by(segments) %>% summarise(avg_sales_av=mean(SAL_av),avg_price_av = mean(AP_av),avg_rating_av = mean(AR_av),avg_discount_av = mean(AD_av),avg_number_of_reviews_av = (mean(ANR_av)))

av_ozon_tv$Brand = as.character(av_ozon_tv$Brand)

av_ozon_tv$Product.Name = as.character(av_ozon_tv$Product.Name)

seg_av_sales = av_ozon_tv %>% group_by(segments) %>% count(mean(SAL_av)) %>% filter(n==max(n))

seg_av_brand = av_ozon_tv %>% group_by(segments) %>% count(Brand) %>% filter(n==max(n))

#Final cluster

seg_av_all = av_ozon_tv %>% group_by(segments) %>% count(Product.Name, mean(SAL_av),mean(AP_av),mean(AR_av), mean(AD_av)) %>% filter(n==max(n))

av_ozon_tv

#Numbers of our users by segments

av_ozon_tv%>% group_by(segments) %>% tally()

seg_lm_ozon<-merge(seg_main_ozon,seg_av_ozon)

#LM for seg_mean_ozon + seg_av_prod

library(car)

LM_main <- lm(avg_sales_main ~ avg_review_main + avg_rating_main + avg_sales_av + avg_price_av + avg_rating_av + avg_discount_av + avg_number_of_reviews_av, data=seg_lm_ozon)

summary(LM_main)

#Clustering analysis for co-purchased products

library(dplyr) # for data cleaning

library(ISLR) # for college dataset

library(cluster) # for gower similarity and pam

library(Rtsne) # for t-SNE plot

library(ggplot2) # for visualization

# Remove college name before clustering and little bit prepare the data

cp_ozon_tv<-ozon_tv[, c(-1, -4: -14)]

#here I deleted CO-purchase coloumn (binary)

cp_ozon_tv$Brand = as.factor(cp_ozon_tv$Brand)

cp_ozon_tv$Product.Name = as.factor(cp_ozon_tv$Product.Name)

cp_ozon_dist <- daisy (cp_ozon_tv, metric = "gower", type = list(logratio = 3))

# Check attributes to ensure the correct methods are being used

summary(cp_ozon_dist)

#Create matrix

cp_df_mat <- as.matrix(cp_ozon_dist)

# Output most similar pair

cp_ozon_tv[which(cp_df_mat == max(cp_df_mat[cp_df_mat != max(cp_df_mat)]),

 arr.ind = TRUE)[1, ], ]

#Choosing a clustering algorithm

#Calculate silhouette width for many k using PAM

sil_width <- c (NA)

for(i in 2:15){

pam_fit <- pam(cp_ozon_dist,

 diss = TRUE,

 k = i)

sil_width[i] <- pam_fit$silinfo$avg.width

}

# Plot sihouette width (higher is better)

plot(1:15, sil_width,

xlab = "Number of clusters",

ylab = "Silhouette Width")

lines(1:15, sil_width)

#Now we understand, that we have to use 11 clasters, let`s do it!

#Cluster Interpretation

library(cluster) 

library(ISLR)

pam_fit <- pam(av_ozon_dist, diss = TRUE, k = 11)

# IT works

cp_ozon_tv[pam_fit$medoids, ]

#Plotting the results

library(Rtsne)

cp_tsne_obj <- Rtsne(cp_ozon_dist, is_distance = TRUE)

cp_tsne_obj <- cp_tsne_obj$Y %>%

data.frame() %>%

setNames(c("X", "Y")) %>%

mutate(cluster = factor(pam_fit$clustering))

#Plot the results

ggplot(aes(x = X, y = Y), data = cp_tsne_obj) +

geom_point(aes(color = cluster))

#Save the segments inside our dataset

cp_ozon_tv$segments = cp_tsne_obj$cluster

#Create dataset with relusts of our segmentation

seg_cp_ozon=cp_ozon_tv %>% group_by(segments) %>% summarise(avg_price_av = mean(AP_cp),avg_rating_av = mean(AR_cp),avg_number_of_reviews_av = (mean(ANR_cp)))

cp_ozon_tv$Brand = as.character(cp_ozon_tv$Brand)

cp_ozon_tv$Product.Name = as.character(cp_ozon_tv$Product.Name)

seg_cp_price = cp_ozon_tv %>% group_by(segments) %>% count(mean(AP_cp)) %>% filter(n==max(n))

seg_cp_brand = cp_ozon_tv %>% group_by(segments) %>% count(Brand) %>% filter(n==max(n))

#Final cluster

seg_cp_all = cp_ozon_tv %>% group_by(segments) %>% count(Product.Name, mean(AP_cp), mean(AR_cp), mean(ANR_cp)) %>% filter(n==max(n))

cp_ozon_tv

#Numbers of our users by segments

cp_ozon_tv%>% group_by(segments) %>% tally()

#First regression attempts

library(car)

LM_control <- lm(Sales_main ~ Price_main + Rating_main + Discount_main + Reviews_main, data=ozon_tv)

summary(LM_control)

LM_sales <- lm(Sales_main ~ .-ID -Brand -Product.Name -Co_purchase, data=ozon_tv)

summary(LM_sales)

LM_rating <- lm(Rating_main ~ .-ID -Brand -Product.Name -Co_purchase, data=ozon_tv)

summary(LM_rating)

LM_reviews <- lm(Reviews_main ~ .-ID -Brand -Product.Name -Co_purchase, data=ozon_tv)

summary(LM_reviews)

LogM_sales <- lm(log(Sales_main) ~ log(Discount_main), data=ozon_tv)

summary(LogM_sales)

#Descriptive statistics

install.packages("pastecs")

library(pastecs)

descriptive <- stat.desc(ozon_tv[, 4:13])

round(descriptive, 2)

head(descriptive)

descriptive

str(descriptive)

decriptive_table <- table(descriptive)

as.data.frame(descriptive)

descriptive

install.packages("ggpubr")

library(ggpubr)

#ratings

ggboxplot(ozon_tv, y = "Rating_main", width = 0.5)

#reviews

ggboxplot(ozon_tv, y = "Reviews_main", width = 0.5)

#Discount

ggboxplot(ozon_tv, y = "Discount_main", width = 0.5)

#Price

ggboxplot(ozon_tv, y = "Price_main", width = 0.5)

#Sales

ggboxplot(ozon_tv, y = "Sales_main", width = 0.5)

#histograms

#sales

gghistogram(ozon_tv, x = "Sales_main", bins = 9, 

add = "mean")

gghistogram(ozon_tv, x = "Rating_main", bins = 9, 

add = "mean")

gghistogram(ozon_tv, x = "Discount_main", bins = 9, 

add = "mean")

#qqplots

ggqqplot(ozon_tv, x = "Reviews_main")

ggqqplot(ozon_tv, x = "Rating_main")

ggqqplot(ozon_tv, x = "Sales_main")

#Sales by brand

ozon_brands <- read.csv("Ozon_brands.csv", header=TRUE, sep=",")

ggboxplot(ozon_brands, x = "Brand", y = "Sales_main",

color = "Brand")

ggstripchart(ozon_brands, x = "Brand", y = "Sales_main",

color = "Brand",

add = "mean_sd")

ggbarplot(df, x = "Brands", y = "Sales",

color = "Eye", position = position_dodge(),

palette = c("brown", "blue", "gold", "green"))

#Fitting the model

#Regression without outliers

#Without outliers first attempt

ozon_outliers <- read.csv("Ozon_outliers.csv", header=TRUE, sep=",")

LM_sales_outliers <- lm(Sales_main ~ .-ID -Brand -Product.Name -Co_purchase, data=ozon_outliers)

summary(LM_sales_outliers)

LM_rating_outliers <- lm(Rating_main ~ .-ID -Brand -Product.Name -Co_purchase, data=ozon_outliers)

summary(LM_rating_outliers)

LM_reviews_outliers <- lm(Reviews_main ~ .-ID -Brand -Product.Name -Co_purchase, data=ozon_outliers)

summary(LM_reviews_outliers)

#Without outliers second attempt

ozon_outliers2 <- read.csv("Ozon_outliers2.csv", header=TRUE, sep=",")

LM_sales_outliers2 <- lm(Sales_main ~ .-ID -Brand -Product.Name -Co_purchase, data=ozon_outliers2)

summary(LM_sales_outliers2)

LM_rating_outliers2 <- lm(Rating_main ~ .-ID -Brand -Product.Name -Co_purchase, data=ozon_outliers2)

summary(LM_rating_outliers2)

LM_reviews_outliers2 <- lm(Reviews_main ~ .-ID -Brand -Product.Name -Co_purchase, data=ozon_outliers2)

summary(LM_reviews_outliers2)

Размещено на Allbest.ru

...

Подобные документы

  • Origins of and reasons for product placement: history of product placement in the cinema, sponsored shows. Factors that can influence the cost of a placement. Branded entertainment in all its forms: series and television programs, novels and plays.

    курсовая работа [42,1 K], добавлен 16.10.2013

  • Improving the business processes of customer relationship management through automation. Solutions the problem of the absence of automation of customer related business processes. Develop templates to support ongoing processes of customer relationships.

    реферат [173,6 K], добавлен 14.02.2016

  • Оргтехника как основа для работы офиса, ее типы и функциональные особенности, значение. Необходимость использования компьютера, ее обоснование. Информационные системы в управлении и принципы их формирования. Модели продаж CRM-систем On-demand (или SaaS).

    курсовая работа [1,6 M], добавлен 01.04.2012

  • Evaluation of urban public transport system in Indonesia, the possibility of its effective development. Analysis of influence factors by using the Ishikawa Cause and Effect diagram and also the use of Pareto analysis. Using business process reengineering.

    контрольная работа [398,2 K], добавлен 21.04.2014

  • The impact of management and leadership styles on strategic decisions. Creating a leadership strategy that supports organizational direction. Appropriate methods to review current leadership requirements. Plan for the development of future situations.

    курсовая работа [36,2 K], добавлен 20.05.2015

  • Selected aspects of stimulation of scientific thinking. Meta-skills. Methods of critical and creative thinking. Analysis of the decision-making methods without use of numerical values of probability (exemplificative of the investment projects).

    аттестационная работа [196,7 K], добавлен 15.10.2008

  • Рассмотрение концепции Customer Relationship Management по управлению взаимоотношениями с клиентами. Возможности CRM-систем, их влияние на эффективность бизнеса. Разработка, реализация и стоимость проекта внедрения CRM-системы для ЗАО "Сибтехнология".

    дипломная работа [5,5 M], добавлен 15.09.2012

  • Critical literature review. Apparel industry overview: Porter’s Five Forces framework, PESTLE, competitors analysis, key success factors of the industry. Bershka’s business model. Integration-responsiveness framework. Critical evaluation of chosen issue.

    контрольная работа [29,1 K], добавлен 04.10.2014

  • Impact of globalization on the way organizations conduct their businesses overseas, in the light of increased outsourcing. The strategies adopted by General Electric. Offshore Outsourcing Business Models. Factors for affect the success of the outsourcing.

    реферат [32,3 K], добавлен 13.10.2011

  • Major factors of success of managers. Effective achievement of the organizational purposes. Use of "emotional investigation". Providing support to employees. That is appeal charisma. Positive morale and recognition. Feedback of the head with workers.

    презентация [1,8 M], добавлен 15.07.2012

  • Сущность CRM-систем - Customer Relationship Management. Преимущества клиенториентированного подхода к бизнесу. Формы функционирования и классификация CRM-систем. Основные инструменты, которые включает в себя технология управления отношениями с клиентами.

    реферат [30,9 K], добавлен 12.01.2011

  • Six principles of business etiquette survival or success in the business world. Punctuality, privacy, courtesy, friendliness and affability, attention to people, appearance, literacy speaking and writing as the major commandments of business man.

    презентация [287,1 K], добавлен 21.10.2013

  • Discussion of organizational culture. The major theories of personality. Social perception, its elements and common barriers. Individual and organizational influences on ethical behavior. The psychophysiology of the stress response.

    контрольная работа [27,7 K], добавлен 19.11.2012

  • Relevance of electronic document flow implementation. Description of selected companies. Pattern of ownership. Sectorial branch. Company size. Resources used. Current document flow. Major advantage of the information system implementation in the work.

    курсовая работа [128,1 K], добавлен 14.02.2016

  • Понятие и сущность мотивации трудовой деятельности персонала. Особенности применения методов стимулирования в коммерческих организациях на примере Levi’s Russia. Методы нематериального стимулирования персонала. Вклад сотрудника в прибыль компании.

    курсовая работа [27,8 K], добавлен 15.05.2014

  • Searching for investor and interaction with him. Various problems in the project organization and their solutions: design, page-proof, programming, the choice of the performers. Features of the project and the results of its creation, monetization.

    реферат [22,0 K], добавлен 14.02.2016

  • Analysis of the peculiarities of the mobile applications market. The specifics of the process of mobile application development. Systematization of the main project management methodologies. Decision of the problems of use of the classical methodologies.

    контрольная работа [1,4 M], добавлен 14.02.2016

  • Description of the structure of the airline and the structure of its subsystems. Analysis of the main activities of the airline, other goals. Building the “objective tree” of the airline. Description of the environmental features of the transport company.

    курсовая работа [1,2 M], добавлен 03.03.2013

  • The concept and features of bankruptcy. Methods prevent bankruptcy of Russian small businesses. General characteristics of crisis management. Calculating the probability of bankruptcy discriminant function in the example of "Kirov Plant "Mayak".

    курсовая работа [74,5 K], добавлен 18.05.2015

  • Value and probability weighting function. Tournament games as special settings for a competition between individuals. Model: competitive environment, application of prospect theory. Experiment: design, conducting. Analysis of experiment results.

    курсовая работа [1,9 M], добавлен 20.03.2016

Работы в архивах красиво оформлены согласно требованиям ВУЗов и содержат рисунки, диаграммы, формулы и т.д.
PPT, PPTX и PDF-файлы представлены только в архивах.
Рекомендуем скачать работу.