--- title: "Ejercicios Clase 8" author: "" header-includes: - \usepackage{bbm} date: "06/11/2023" output: pdf_document --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` ```{r message=FALSE, warning=FALSE, include=FALSE} library(quanteda) #librerías library(knitr) library(quanteda.textplots) library(quanteda.textstats) base_final_grande <- read.delim("C:/Users/Usuario/Documents/IntroRTextoLIM/Clase8/Material/base_genero_final.txt",sep="\t",header = T,encoding = "latin1") ``` ```{r tokens, message=FALSE, warning=FALSE, include=FALSE} dfm <- dfm(tokens(base_final_grande$text, remove_punct = TRUE,remove_numbers = TRUE), tolower = TRUE, remove_punct = TRUE, remove_numbers = TRUE, remove = c(stopwords("spanish"))) %>% dfm_remove(min_nchar=3) %>% dfm_trim(min_termfreq = 50, min_docfreq = 2) ``` # Introducción El presente **documento** contiene algunos resultados del procesamiento de menciones parlamentarias realizadas en el marco del curso de *Introducción a la programación y análisis de texto con R*. ```{r echo=FALSE, message=FALSE, warning=FALSE} palabras=topfeatures(dfm,10) knitr::kable(palabras, caption = "Palabras más mencionadas", col.names = c("N")) ``` ```{r message=FALSE, warning=FALSE, include=FALSE} #hago un corpus nuevo y aplico kwic kw_ddhhsspp <- kwic(tolower(base_final_grande$text), pattern = phrase(c("derechos humanos", "salud pública"))) dfm_2 <- dfm(tokens(kw_ddhhsspp$pre, remove_punct = TRUE,remove_numbers = TRUE), tolower = TRUE, remove_punct = TRUE, remove_numbers = TRUE, remove = c(stopwords("spanish"))) %>% dfm_remove(min_nchar=3)%>% dfm_group(groups = kw_ddhhsspp$keyword) ``` \newpage # Nubes de palabras ## General A continuación hago una nube de palabras general y por grupos. ```{r echo=FALSE, fig.cap="Nube", message=FALSE, warning=FALSE, out.width="80%"} textplot_wordcloud(dfm_2, min.count = 4,max_words = 100,random.order = TRUE, rot.per = .50, colors = RColorBrewer::brewer.pal(8,"Dark2")) ``` ##Grupos ```{r echo=FALSE, message=FALSE, warning=FALSE,out.width = "100%",fig.cap="Nube de palabras por grupos"} textplot_wordcloud(dfm_2, min.count = 1,max_words = 100,random.order = TRUE, rot.per = .50, colors = RColorBrewer::brewer.pal(8,"Dark2"),comparison = TRUE) ```