Methods
Method new()
Initializes the KeywordProcessor
object.
Usage
KeywordProcessor$new(
keys = NULL,
words = NULL,
trie = NULL,
id = "_word_",
chars = paste0(c(letters, LETTERS, 0:9, "_"), collapse = ""),
ignore_case = FALSE
)
Arguments
keys
character vector. Strings to identify (find/replace) in the text. Must be provided if
trie
isNULL
.words
character vector. Strings to be returned (find) or replaced (replace) when found the respective
keys
. Should have the same length askeys
. If not provided,words = keys
.trie
character. JSON built character by character and needed for the search. It can be provided instead of
keys
andwords
.id
character. Used to name the end nodes of the
trie
dictionary.chars
character. Used to validate if a word continues. Default
paste0(c(letters, LETTERS, 0:9, "_"), collapse = "")
equivalent to[a-zA-Z0-9_]
.ignore_case
logical. If
FALSE
the search is case sensitive. DefaultTRUE
.
Examples
library(rflashtext)
processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$attrs
library(rflashtext)
processor <- KeywordProcessor$new(chars = paste0(letters, collapse = ""), keys = c("NY", "LA"))
processor$attrs
Method show_trie()
Shows the trie
dictionary used to find/replace keys
.
Returns
character. JSON string of the trie
structure. It can be converted to list using jsonlite::fromJSON
.
Examples
library(rflashtext)
processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$show_trie()
Method add_keys_words()
Adds keys
and words
to the trie
dictionary.
Arguments
keys
character vector. Strings to identify (find/replace) in the text.
words
character vector. Strings to be returned (find) or replaced (replace) when found the respective
keys
. Should have the same length askeys
. If not provided,words = keys
.
Examples
library(rflashtext)
processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$add_keys_words(keys = "CA", words = "California")
processor$show_trie()
Method contain_keys()
Checks if keys
are in the trie
dictionary.
Examples
library(rflashtext)
processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$contain_keys(keys = c("NY", "LA", "TX"))
Method get_words()
Gets the words
for the keys
found in the trie
dictionary.
Examples
library(rflashtext)
processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$get_words(keys = c("NY", "LA", "TX"))
Method find_keys()
Finds keys
in the sentences using the search trie
dictionary.
Arguments
sentences
character vector. Text to find the
keys
previously defined.span_info
logical.
TRUE
to retrieve thewords
and the position of the matches.FALSE
to only retrieve thewords
. DefaultTRUE
.
Returns
list with the words
corresponding to keys
found in the sentence
. Hint: Use data.table::rbindlist(...)
to transform the list to a data frame.
Examples
library(rflashtext)
processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
words_found <- processor$find_keys(sentences = "I live in LA but I like NY")
words_found
Method replace_keys()
Replaces keys
found in the sentences by the corresponding words
.
Examples
library(rflashtext)
processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
new_sentences <- processor$replace_keys(sentences = "I live in LA but I like NY")
new_sentences
Examples
library(rflashtext)
processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$contain_keys(keys = "NY")
#> [1] TRUE
processor$get_words(keys = "LA")
#> [1] "Los Angeles"
processor$find_keys(sentences = "I live in LA but I like NY")
#> [[1]]
#> [[1]]$word
#> [1] "Los Angeles" "New York"
#>
#> [[1]]$start
#> [1] 11 25
#>
#> [[1]]$end
#> [1] 12 26
#>
#>
processor$replace_keys(sentences = "I live in LA but I like NY")
#> [1] "I live in Los Angeles but I like New York"
## ------------------------------------------------
## Method `KeywordProcessor$new`
## ------------------------------------------------
library(rflashtext)
processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$attrs
#> $trie
#> <pointer: 0x55d2ed3cd210>
#>
#> $id
#> [1] "_word_"
#>
#> $chars
#> [1] "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"
#>
#> $ignore_case
#> [1] FALSE
#>
library(rflashtext)
processor <- KeywordProcessor$new(chars = paste0(letters, collapse = ""), keys = c("NY", "LA"))
processor$attrs
#> $trie
#> <pointer: 0x55d2e9ed6b70>
#>
#> $id
#> [1] "_word_"
#>
#> $chars
#> [1] "abcdefghijklmnopqrstuvwxyz"
#>
#> $ignore_case
#> [1] FALSE
#>
## ------------------------------------------------
## Method `KeywordProcessor$show_trie`
## ------------------------------------------------
library(rflashtext)
processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$show_trie()
#> [1] "{\"L\":{\"A\":{\"_word_\":\"Los Angeles\"}},\"N\":{\"Y\":{\"_word_\":\"New York\"}}}"
## ------------------------------------------------
## Method `KeywordProcessor$add_keys_words`
## ------------------------------------------------
library(rflashtext)
processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$add_keys_words(keys = "CA", words = "California")
processor$show_trie()
#> [1] "{\"C\":{\"A\":{\"_word_\":\"California\"}},\"L\":{\"A\":{\"_word_\":\"Los Angeles\"}},\"N\":{\"Y\":{\"_word_\":\"New York\"}}}"
## ------------------------------------------------
## Method `KeywordProcessor$contain_keys`
## ------------------------------------------------
library(rflashtext)
processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$contain_keys(keys = c("NY", "LA", "TX"))
#> [1] TRUE TRUE FALSE
## ------------------------------------------------
## Method `KeywordProcessor$get_words`
## ------------------------------------------------
library(rflashtext)
processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$get_words(keys = c("NY", "LA", "TX"))
#> [1] "New York" "Los Angeles" NA
## ------------------------------------------------
## Method `KeywordProcessor$find_keys`
## ------------------------------------------------
library(rflashtext)
processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
words_found <- processor$find_keys(sentences = "I live in LA but I like NY")
words_found
#> [[1]]
#> [[1]]$word
#> [1] "Los Angeles" "New York"
#>
#> [[1]]$start
#> [1] 11 25
#>
#> [[1]]$end
#> [1] 12 26
#>
#>
## ------------------------------------------------
## Method `KeywordProcessor$replace_keys`
## ------------------------------------------------
library(rflashtext)
processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
new_sentences <- processor$replace_keys(sentences = "I live in LA but I like NY")
new_sentences
#> [1] "I live in Los Angeles but I like New York"