Methods
Method new()
Usage
keyword_processor$new(
  ignore_case = TRUE,
  word_chars = c(letters, LETTERS, 0:9, "_"),
  dict = NULL
)Arguments
ignore_caselogical. If
FALSEthe search is case sensitive. DefaultTRUE.word_charscharacter vector. Used to validate if a word continues. Default
c(letters, LETTERS, 0:9, "_")equivalent to[a-zA-Z0-9_].dictlist. Internally built character by character and needed for the search. Recommended to let the default value
NULL.
Examples
library(rflashtext)
processor <- keyword_processor$new(ignore_case = FALSE, word_chars = letters)
processorMethod show_attrs()
Arguments
attrscharacter vector. Options are subsets of
c("all", "id", "word_chars", "dict", "ignore_case", "dict_size"). Default"all".
Returns
list with the values of the attrs. Useful to save dict and reuse it or to check the dict_size.
Examples
library(rflashtext)
processor <- keyword_processor$new()
processor$add_keys_words(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$show_attrs(attrs = "dict_size")
processor$show_attrs(attrs = "dict")Method add_keys_words()
Arguments
keyscharacter vector. Strings to identify (find/replace) in the text.
wordscharacter vector. Strings to be returned (find) or replaced (replace) when found the respective
keys. Should have the same length askeys. If not provided,words = keys.
Returns
invisible. Assign to a variable to inspect the output. Logical vector. FALSE if keys are duplicated, the respective words will be updated.
Examples
library(rflashtext)
processor <- keyword_processor$new()
processor$add_keys_words(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
correct <- processor$add_keys_words(keys = c("NY", "CA"), words = c("New York City", "California"))
# To check if there are duplicate keys
correctMethod contain_keys()
Examples
library(rflashtext)
processor <- keyword_processor$new()
processor$add_keys_words(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$contain_keys(keys = c("NY", "LA", "TX"))Method get_words()
Examples
library(rflashtext)
processor <- keyword_processor$new()
processor$add_keys_words(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$get_words(keys = c("NY", "LA", "TX"))Method find_keys()
Arguments
sentencecharacter. Text to find the
keyspreviously defined. Not vectorized.span_infological.
TRUEto retrieve thewordsand the position of the matches.FALSEto only retrieve thewords. DefaultTRUE.
Returns
list with the words corresponding to keys found in the sentence. Hint: Use do.call(rbind, ...) to transform the list to a matrix.
Examples
library(rflashtext)
processor <- keyword_processor$new()
processor$add_keys_words(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
words_found <- processor$find_keys(sentence = "I live in LA but I like NY")
do.call(rbind, words_found)Method replace_keys()
Arguments
sentencecharacter. Text to replace the
keysfound by the correspondingwords. Not vectorized.
Examples
library(rflashtext)
processor <- keyword_processor$new()
processor$add_keys_words(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
new_sentence <- processor$replace_keys(sentence = "I live in LA but I like NY")
new_sentenceExamples
library(rflashtext)
processor <- keyword_processor$new()
#> Use `KeywordProcessor` instead for better performance
processor$add_keys_words(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$contain_keys(keys = "NY")
#> [1] TRUE
processor$get_words(keys = "LA")
#> [1] "Los Angeles"
processor$find_keys(sentence = "I live in LA but I like NY")
#> [[1]]
#> [[1]]$word
#> [1] "Los Angeles"
#> 
#> [[1]]$start
#> [1] 11
#> 
#> [[1]]$end
#> [1] 13
#> 
#> 
#> [[2]]
#> [[2]]$word
#> [1] "New York"
#> 
#> [[2]]$start
#> [1] 25
#> 
#> [[2]]$end
#> [1] 26
#> 
#> 
processor$replace_keys(sentence = "I live in LA but I like NY")
#> [1] "I live in Los Angeles but I like New York"
## ------------------------------------------------
## Method `keyword_processor$new`
## ------------------------------------------------
library(rflashtext)
processor <- keyword_processor$new(ignore_case = FALSE, word_chars = letters)
#> Use `KeywordProcessor` instead for better performance
processor
#> <keyword_processor>
#>   Public:
#>     add_keys_words: function (keys, words = NULL) 
#>     contain_keys: function (keys) 
#>     find_keys: function (sentence, span_info = TRUE) 
#>     get_words: function (keys) 
#>     initialize: function (ignore_case = TRUE, word_chars = c(letters, LETTERS, 
#>     replace_keys: function (sentence) 
#>     show_attrs: function (attrs = "all") 
#>   Private:
#>     add_key_word: function (key, word) 
#>     attrs: list
#>     contain_key: function (key) 
#>     find_key: function (sentence, span_info) 
#>     get_word: function (key) 
#>     replace_key: function (sentence) 
#>     set_attr: function (id, ignore_case, word_chars, dict) 
## ------------------------------------------------
## Method `keyword_processor$show_attrs`
## ------------------------------------------------
library(rflashtext)
processor <- keyword_processor$new()
#> Use `KeywordProcessor` instead for better performance
processor$add_keys_words(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$show_attrs(attrs = "dict_size")
#> [1] 2
processor$show_attrs(attrs = "dict")
#> $`_class_`
#> [1] "keyword_dictionary"
#> 
#> $n
#> $n$y
#> $n$y$`_word_`
#> [1] "New York"
#> 
#> 
#> 
#> $l
#> $l$a
#> $l$a$`_word_`
#> [1] "Los Angeles"
#> 
#> 
#> 
## ------------------------------------------------
## Method `keyword_processor$add_keys_words`
## ------------------------------------------------
library(rflashtext)
processor <- keyword_processor$new()
#> Use `KeywordProcessor` instead for better performance
processor$add_keys_words(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
correct <- processor$add_keys_words(keys = c("NY", "CA"), words = c("New York City", "California"))
#> Warning: There are duplicate keys. To a better check assign the output to a variable.
# To check if there are duplicate keys
correct
#> [1] FALSE  TRUE
## ------------------------------------------------
## Method `keyword_processor$contain_keys`
## ------------------------------------------------
library(rflashtext)
processor <- keyword_processor$new()
#> Use `KeywordProcessor` instead for better performance
processor$add_keys_words(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$contain_keys(keys = c("NY", "LA", "TX"))
#> [1]  TRUE  TRUE FALSE
## ------------------------------------------------
## Method `keyword_processor$get_words`
## ------------------------------------------------
library(rflashtext)
processor <- keyword_processor$new()
#> Use `KeywordProcessor` instead for better performance
processor$add_keys_words(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$get_words(keys = c("NY", "LA", "TX"))
#> [1] "New York"    "Los Angeles" NA           
## ------------------------------------------------
## Method `keyword_processor$find_keys`
## ------------------------------------------------
library(rflashtext)
processor <- keyword_processor$new()
#> Use `KeywordProcessor` instead for better performance
processor$add_keys_words(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
words_found <- processor$find_keys(sentence = "I live in LA but I like NY")
do.call(rbind, words_found)
#>      word          start end
#> [1,] "Los Angeles" 11    13 
#> [2,] "New York"    25    26 
## ------------------------------------------------
## Method `keyword_processor$replace_keys`
## ------------------------------------------------
library(rflashtext)
processor <- keyword_processor$new()
#> Use `KeywordProcessor` instead for better performance
processor$add_keys_words(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
new_sentence <- processor$replace_keys(sentence = "I live in LA but I like NY")
new_sentence
#> [1] "I live in Los Angeles but I like New York"