Methods
Method new()
Initializes the KeywordProcessor object.
Usage
KeywordProcessor$new(
keys = NULL,
words = NULL,
trie = NULL,
id = "_word_",
chars = paste0(c(letters, LETTERS, 0:9, "_"), collapse = ""),
ignore_case = FALSE
)Arguments
keyscharacter vector. Strings to identify (find/replace) in the text. Must be provided if
trieisNULL.wordscharacter vector. Strings to be returned (find) or replaced (replace) when found the respective
keys. Should have the same length askeys. If not provided,words = keys.triecharacter. JSON built character by character and needed for the search. It can be provided instead of
keysandwords.idcharacter. Used to name the end nodes of the
triedictionary.charscharacter. Used to validate if a word continues. Default
paste0(c(letters, LETTERS, 0:9, "_"), collapse = "")equivalent to[a-zA-Z0-9_].ignore_caselogical. If
FALSEthe search is case sensitive. DefaultTRUE.
Examples
library(rflashtext)
processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$attrslibrary(rflashtext)
processor <- KeywordProcessor$new(chars = paste0(letters, collapse = ""), keys = c("NY", "LA"))
processor$attrsMethod show_trie()
Shows the trie dictionary used to find/replace keys.
Returns
character. JSON string of the trie structure. It can be converted to list using jsonlite::fromJSON.
Examples
library(rflashtext)
processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$show_trie()Method add_keys_words()
Adds keys and words to the trie dictionary.
Arguments
keyscharacter vector. Strings to identify (find/replace) in the text.
wordscharacter vector. Strings to be returned (find) or replaced (replace) when found the respective
keys. Should have the same length askeys. If not provided,words = keys.
Examples
library(rflashtext)
processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$add_keys_words(keys = "CA", words = "California")
processor$show_trie()Method contain_keys()
Checks if keys are in the trie dictionary.
Examples
library(rflashtext)
processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$contain_keys(keys = c("NY", "LA", "TX"))Method get_words()
Gets the words for the keys found in the trie dictionary.
Examples
library(rflashtext)
processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$get_words(keys = c("NY", "LA", "TX"))Method find_keys()
Finds keys in the sentences using the search trie dictionary.
Arguments
sentencescharacter vector. Text to find the
keyspreviously defined.span_infological.
TRUEto retrieve thewordsand the position of the matches.FALSEto only retrieve thewords. DefaultTRUE.
Returns
list with the words corresponding to keys found in the sentence. Hint: Use data.table::rbindlist(...) to transform the list to a data frame.
Examples
library(rflashtext)
processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
words_found <- processor$find_keys(sentences = "I live in LA but I like NY")
words_foundMethod replace_keys()
Replaces keys found in the sentences by the corresponding words.
Examples
library(rflashtext)
processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
new_sentences <- processor$replace_keys(sentences = "I live in LA but I like NY")
new_sentencesExamples
library(rflashtext)
processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$contain_keys(keys = "NY")
#> [1] TRUE
processor$get_words(keys = "LA")
#> [1] "Los Angeles"
processor$find_keys(sentences = "I live in LA but I like NY")
#> [[1]]
#> [[1]]$word
#> [1] "Los Angeles" "New York"
#>
#> [[1]]$start
#> [1] 11 25
#>
#> [[1]]$end
#> [1] 12 26
#>
#>
processor$replace_keys(sentences = "I live in LA but I like NY")
#> [1] "I live in Los Angeles but I like New York"
## ------------------------------------------------
## Method `KeywordProcessor$new`
## ------------------------------------------------
library(rflashtext)
processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$attrs
#> $trie
#> <pointer: 0x55d2ed3cd210>
#>
#> $id
#> [1] "_word_"
#>
#> $chars
#> [1] "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"
#>
#> $ignore_case
#> [1] FALSE
#>
library(rflashtext)
processor <- KeywordProcessor$new(chars = paste0(letters, collapse = ""), keys = c("NY", "LA"))
processor$attrs
#> $trie
#> <pointer: 0x55d2e9ed6b70>
#>
#> $id
#> [1] "_word_"
#>
#> $chars
#> [1] "abcdefghijklmnopqrstuvwxyz"
#>
#> $ignore_case
#> [1] FALSE
#>
## ------------------------------------------------
## Method `KeywordProcessor$show_trie`
## ------------------------------------------------
library(rflashtext)
processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$show_trie()
#> [1] "{\"L\":{\"A\":{\"_word_\":\"Los Angeles\"}},\"N\":{\"Y\":{\"_word_\":\"New York\"}}}"
## ------------------------------------------------
## Method `KeywordProcessor$add_keys_words`
## ------------------------------------------------
library(rflashtext)
processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$add_keys_words(keys = "CA", words = "California")
processor$show_trie()
#> [1] "{\"C\":{\"A\":{\"_word_\":\"California\"}},\"L\":{\"A\":{\"_word_\":\"Los Angeles\"}},\"N\":{\"Y\":{\"_word_\":\"New York\"}}}"
## ------------------------------------------------
## Method `KeywordProcessor$contain_keys`
## ------------------------------------------------
library(rflashtext)
processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$contain_keys(keys = c("NY", "LA", "TX"))
#> [1] TRUE TRUE FALSE
## ------------------------------------------------
## Method `KeywordProcessor$get_words`
## ------------------------------------------------
library(rflashtext)
processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$get_words(keys = c("NY", "LA", "TX"))
#> [1] "New York" "Los Angeles" NA
## ------------------------------------------------
## Method `KeywordProcessor$find_keys`
## ------------------------------------------------
library(rflashtext)
processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
words_found <- processor$find_keys(sentences = "I live in LA but I like NY")
words_found
#> [[1]]
#> [[1]]$word
#> [1] "Los Angeles" "New York"
#>
#> [[1]]$start
#> [1] 11 25
#>
#> [[1]]$end
#> [1] 12 26
#>
#>
## ------------------------------------------------
## Method `KeywordProcessor$replace_keys`
## ------------------------------------------------
library(rflashtext)
processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
new_sentences <- processor$replace_keys(sentences = "I live in LA but I like NY")
new_sentences
#> [1] "I live in Los Angeles but I like New York"