Methods
Method new()
Usage
keyword_processor$new(
ignore_case = TRUE,
word_chars = c(letters, LETTERS, 0:9, "_"),
dict = NULL
)
Arguments
ignore_case
logical. If
FALSE
the search is case sensitive. DefaultTRUE
.word_chars
character vector. Used to validate if a word continues. Default
c(letters, LETTERS, 0:9, "_")
equivalent to[a-zA-Z0-9_]
.dict
list. Internally built character by character and needed for the search. Recommended to let the default value
NULL
.
Examples
library(rflashtext)
processor <- keyword_processor$new(ignore_case = FALSE, word_chars = letters)
processor
Method show_attrs()
Arguments
attrs
character vector. Options are subsets of
c("all", "id", "word_chars", "dict", "ignore_case", "dict_size")
. Default"all"
.
Returns
list with the values of the attrs
. Useful to save dict
and reuse it or to check the dict_size
.
Examples
library(rflashtext)
processor <- keyword_processor$new()
processor$add_keys_words(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$show_attrs(attrs = "dict_size")
processor$show_attrs(attrs = "dict")
Method add_keys_words()
Arguments
keys
character vector. Strings to identify (find/replace) in the text.
words
character vector. Strings to be returned (find) or replaced (replace) when found the respective
keys
. Should have the same length askeys
. If not provided,words = keys
.
Returns
invisible. Assign to a variable to inspect the output. Logical vector. FALSE
if keys
are duplicated, the respective words
will be updated.
Examples
library(rflashtext)
processor <- keyword_processor$new()
processor$add_keys_words(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
correct <- processor$add_keys_words(keys = c("NY", "CA"), words = c("New York City", "California"))
# To check if there are duplicate keys
correct
Method contain_keys()
Examples
library(rflashtext)
processor <- keyword_processor$new()
processor$add_keys_words(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$contain_keys(keys = c("NY", "LA", "TX"))
Method get_words()
Examples
library(rflashtext)
processor <- keyword_processor$new()
processor$add_keys_words(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$get_words(keys = c("NY", "LA", "TX"))
Method find_keys()
Arguments
sentence
character. Text to find the
keys
previously defined. Not vectorized.span_info
logical.
TRUE
to retrieve thewords
and the position of the matches.FALSE
to only retrieve thewords
. DefaultTRUE
.
Returns
list with the words
corresponding to keys
found in the sentence
. Hint: Use do.call(rbind, ...)
to transform the list to a matrix.
Examples
library(rflashtext)
processor <- keyword_processor$new()
processor$add_keys_words(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
words_found <- processor$find_keys(sentence = "I live in LA but I like NY")
do.call(rbind, words_found)
Method replace_keys()
Arguments
sentence
character. Text to replace the
keys
found by the correspondingwords
. Not vectorized.
Examples
library(rflashtext)
processor <- keyword_processor$new()
processor$add_keys_words(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
new_sentence <- processor$replace_keys(sentence = "I live in LA but I like NY")
new_sentence
Examples
library(rflashtext)
processor <- keyword_processor$new()
#> Use `KeywordProcessor` instead for better performance
processor$add_keys_words(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$contain_keys(keys = "NY")
#> [1] TRUE
processor$get_words(keys = "LA")
#> [1] "Los Angeles"
processor$find_keys(sentence = "I live in LA but I like NY")
#> [[1]]
#> [[1]]$word
#> [1] "Los Angeles"
#>
#> [[1]]$start
#> [1] 11
#>
#> [[1]]$end
#> [1] 13
#>
#>
#> [[2]]
#> [[2]]$word
#> [1] "New York"
#>
#> [[2]]$start
#> [1] 25
#>
#> [[2]]$end
#> [1] 26
#>
#>
processor$replace_keys(sentence = "I live in LA but I like NY")
#> [1] "I live in Los Angeles but I like New York"
## ------------------------------------------------
## Method `keyword_processor$new`
## ------------------------------------------------
library(rflashtext)
processor <- keyword_processor$new(ignore_case = FALSE, word_chars = letters)
#> Use `KeywordProcessor` instead for better performance
processor
#> <keyword_processor>
#> Public:
#> add_keys_words: function (keys, words = NULL)
#> contain_keys: function (keys)
#> find_keys: function (sentence, span_info = TRUE)
#> get_words: function (keys)
#> initialize: function (ignore_case = TRUE, word_chars = c(letters, LETTERS,
#> replace_keys: function (sentence)
#> show_attrs: function (attrs = "all")
#> Private:
#> add_key_word: function (key, word)
#> attrs: list
#> contain_key: function (key)
#> find_key: function (sentence, span_info)
#> get_word: function (key)
#> replace_key: function (sentence)
#> set_attr: function (id, ignore_case, word_chars, dict)
## ------------------------------------------------
## Method `keyword_processor$show_attrs`
## ------------------------------------------------
library(rflashtext)
processor <- keyword_processor$new()
#> Use `KeywordProcessor` instead for better performance
processor$add_keys_words(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$show_attrs(attrs = "dict_size")
#> [1] 2
processor$show_attrs(attrs = "dict")
#> $`_class_`
#> [1] "keyword_dictionary"
#>
#> $n
#> $n$y
#> $n$y$`_word_`
#> [1] "New York"
#>
#>
#>
#> $l
#> $l$a
#> $l$a$`_word_`
#> [1] "Los Angeles"
#>
#>
#>
## ------------------------------------------------
## Method `keyword_processor$add_keys_words`
## ------------------------------------------------
library(rflashtext)
processor <- keyword_processor$new()
#> Use `KeywordProcessor` instead for better performance
processor$add_keys_words(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
correct <- processor$add_keys_words(keys = c("NY", "CA"), words = c("New York City", "California"))
#> Warning: There are duplicate keys. To a better check assign the output to a variable.
# To check if there are duplicate keys
correct
#> [1] FALSE TRUE
## ------------------------------------------------
## Method `keyword_processor$contain_keys`
## ------------------------------------------------
library(rflashtext)
processor <- keyword_processor$new()
#> Use `KeywordProcessor` instead for better performance
processor$add_keys_words(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$contain_keys(keys = c("NY", "LA", "TX"))
#> [1] TRUE TRUE FALSE
## ------------------------------------------------
## Method `keyword_processor$get_words`
## ------------------------------------------------
library(rflashtext)
processor <- keyword_processor$new()
#> Use `KeywordProcessor` instead for better performance
processor$add_keys_words(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$get_words(keys = c("NY", "LA", "TX"))
#> [1] "New York" "Los Angeles" NA
## ------------------------------------------------
## Method `keyword_processor$find_keys`
## ------------------------------------------------
library(rflashtext)
processor <- keyword_processor$new()
#> Use `KeywordProcessor` instead for better performance
processor$add_keys_words(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
words_found <- processor$find_keys(sentence = "I live in LA but I like NY")
do.call(rbind, words_found)
#> word start end
#> [1,] "Los Angeles" 11 13
#> [2,] "New York" 25 26
## ------------------------------------------------
## Method `keyword_processor$replace_keys`
## ------------------------------------------------
library(rflashtext)
processor <- keyword_processor$new()
#> Use `KeywordProcessor` instead for better performance
processor$add_keys_words(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
new_sentence <- processor$replace_keys(sentence = "I live in LA but I like NY")
new_sentence
#> [1] "I live in Los Angeles but I like New York"