1 min read

Tidy Vargas Llosa

## Loading required package: RColorBrewer

Sentiment Analysis

mvll_sentiment <- mvll_tidy %>% 
  inner_join(get_sentiments("bing")) %>% 
  count(title, index = line %/% 80  , sentiment) %>%
  spread(sentiment, n) %>%
  mutate(sentiment = positive - negative)


ggplot(mvll_sentiment, aes(index, sentiment, fill=title)) + 
  geom_col() + 
  facet_wrap(~title, scales = "free_x") + 
  guides(fill=FALSE)

Wordcloud con Los cuadernos de Don Rigoberto

s <- mvll_tidy %>% 
  filter(title == "Notebooks of Don Rigoberto" ) %>%
  filter(!str_detect(word, "\u2019")) %>% # remove didn't, they're, etc.
  anti_join(stop_words) %>%
  count(word, sort = TRUE) %>% 
  with(wordcloud(word, n, max.words = 40))

Palabras positivas y negativas más comunes

word_counts <- mvll_tidy %>%
  inner_join(get_sentiments("bing")) %>%
  count(word, sentiment, sort = TRUE) %>%
  ungroup

word_counts %>%
  group_by(sentiment) %>%
  top_n(10) %>%
  ungroup %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(word, n, fill=sentiment)) +
  geom_col(show_legend = FALSE)  +
  facet_wrap(~sentiment, scales = "free_y") + 
  coord_flip()

Palabras características de cada libro

book_words <- mvll_tidy %>%
  count(title, word, sort = TRUE) %>%
  ungroup %>%
  bind_tf_idf(word, title, n)

plt <- book_words %>%
  arrange(desc(tf_idf)) %>%
  mutate(word = factor(word, levels = rev(unique(word))))



plt %>%
  filter(title %in% libros$title[10:13]) %>%
  group_by(title) %>%
  top_n(10) %>%
  ungroup %>%
  ggplot(aes(word, tf_idf, fill = title)) +
  geom_col(show.legend = FALSE) +
  labs(x = NULL, y = "tf-idf") + 
  facet_wrap(~title, ncol = 2, scales="free") + 
  coord_flip()