Skip to contents

Based on the python library flashtext. To see more details about the algorithm visit: FlashText

Public fields

attrs

list. Stores the attributes of the KeywordProcessor object.

Methods


Method new()

Initializes the KeywordProcessor object.

Usage

KeywordProcessor$new(
  keys = NULL,
  words = NULL,
  trie = NULL,
  id = "_word_",
  chars = paste0(c(letters, LETTERS, 0:9, "_"), collapse = ""),
  ignore_case = FALSE
)

Arguments

keys

character vector. Strings to identify (find/replace) in the text. Must be provided if trie is NULL.

words

character vector. Strings to be returned (find) or replaced (replace) when found the respective keys. Should have the same length as keys. If not provided, words = keys.

trie

character. JSON built character by character and needed for the search. It can be provided instead of keys and words.

id

character. Used to name the end nodes of the trie dictionary.

chars

character. Used to validate if a word continues. Default paste0(c(letters, LETTERS, 0:9, "_"), collapse = "") equivalent to [a-zA-Z0-9_].

ignore_case

logical. If FALSE the search is case sensitive. Default TRUE.

Examples

library(rflashtext)

processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$attrs

library(rflashtext)

processor <- KeywordProcessor$new(chars = paste0(letters, collapse = ""), keys = c("NY", "LA"))
processor$attrs


Method show_trie()

Shows the trie dictionary used to find/replace keys.

Usage

KeywordProcessor$show_trie()

Returns

character. JSON string of the trie structure. It can be converted to list using jsonlite::fromJSON.

Examples

library(rflashtext)

processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$show_trie()


Method add_keys_words()

Adds keys and words to the trie dictionary.

Usage

KeywordProcessor$add_keys_words(keys, words = NULL)

Arguments

keys

character vector. Strings to identify (find/replace) in the text.

words

character vector. Strings to be returned (find) or replaced (replace) when found the respective keys. Should have the same length as keys. If not provided, words = keys.

Examples

library(rflashtext)

processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$add_keys_words(keys = "CA", words = "California")
processor$show_trie()


Method contain_keys()

Checks if keys are in the trie dictionary.

Usage

KeywordProcessor$contain_keys(keys)

Arguments

keys

character vector. Strings to check if already are in the search trie dictionary.

Returns

logical vector. TRUE if the keys are in the search trie dictionary.

Examples

library(rflashtext)

processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$contain_keys(keys = c("NY", "LA", "TX"))


Method get_words()

Gets the words for the keys found in the trie dictionary.

Usage

KeywordProcessor$get_words(keys)

Arguments

keys

character vector. Strings to get back the respective words.

Returns

character vector. Respective words. If keys not found returns NA_character_.

Examples

library(rflashtext)

processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$get_words(keys = c("NY", "LA", "TX"))


Method find_keys()

Finds keys in the sentences using the search trie dictionary.

Usage

KeywordProcessor$find_keys(sentences, span_info = TRUE)

Arguments

sentences

character vector. Text to find the keys previously defined.

span_info

logical. TRUE to retrieve the words and the position of the matches. FALSE to only retrieve the words. Default TRUE.

Returns

list with the words corresponding to keys found in the sentence. Hint: Use data.table::rbindlist(...) to transform the list to a data frame.

Examples

library(rflashtext)

processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
words_found <- processor$find_keys(sentences = "I live in LA but I like NY")
words_found


Method replace_keys()

Replaces keys found in the sentences by the corresponding words.

Usage

KeywordProcessor$replace_keys(sentences)

Arguments

sentences

character vector. Text to replace the keys found by the corresponding words.

Returns

character vector. Text with the keys replaced by the respective words.

Examples

library(rflashtext)

processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
new_sentences <- processor$replace_keys(sentences = "I live in LA but I like NY")
new_sentences

Examples

library(rflashtext)

processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))

processor$contain_keys(keys = "NY")
#> [1] TRUE
processor$get_words(keys = "LA")
#> [1] "Los Angeles"

processor$find_keys(sentences = "I live in LA but I like NY")
#> [[1]]
#> [[1]]$word
#> [1] "Los Angeles" "New York"   
#> 
#> [[1]]$start
#> [1] 11 25
#> 
#> [[1]]$end
#> [1] 12 26
#> 
#> 
processor$replace_keys(sentences = "I live in LA but I like NY")
#> [1] "I live in Los Angeles but I like New York"

## ------------------------------------------------
## Method `KeywordProcessor$new`
## ------------------------------------------------

library(rflashtext)

processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$attrs
#> $trie
#> <pointer: 0x55d2ed3cd210>
#> 
#> $id
#> [1] "_word_"
#> 
#> $chars
#> [1] "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"
#> 
#> $ignore_case
#> [1] FALSE
#> 
library(rflashtext)

processor <- KeywordProcessor$new(chars = paste0(letters, collapse = ""), keys = c("NY", "LA"))
processor$attrs
#> $trie
#> <pointer: 0x55d2e9ed6b70>
#> 
#> $id
#> [1] "_word_"
#> 
#> $chars
#> [1] "abcdefghijklmnopqrstuvwxyz"
#> 
#> $ignore_case
#> [1] FALSE
#> 

## ------------------------------------------------
## Method `KeywordProcessor$show_trie`
## ------------------------------------------------

library(rflashtext)

processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$show_trie()
#> [1] "{\"L\":{\"A\":{\"_word_\":\"Los Angeles\"}},\"N\":{\"Y\":{\"_word_\":\"New York\"}}}"

## ------------------------------------------------
## Method `KeywordProcessor$add_keys_words`
## ------------------------------------------------

library(rflashtext)

processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$add_keys_words(keys = "CA", words = "California")
processor$show_trie()
#> [1] "{\"C\":{\"A\":{\"_word_\":\"California\"}},\"L\":{\"A\":{\"_word_\":\"Los Angeles\"}},\"N\":{\"Y\":{\"_word_\":\"New York\"}}}"

## ------------------------------------------------
## Method `KeywordProcessor$contain_keys`
## ------------------------------------------------

library(rflashtext)

processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$contain_keys(keys = c("NY", "LA", "TX"))
#> [1]  TRUE  TRUE FALSE

## ------------------------------------------------
## Method `KeywordProcessor$get_words`
## ------------------------------------------------

library(rflashtext)

processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$get_words(keys = c("NY", "LA", "TX"))
#> [1] "New York"    "Los Angeles" NA           

## ------------------------------------------------
## Method `KeywordProcessor$find_keys`
## ------------------------------------------------

library(rflashtext)

processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
words_found <- processor$find_keys(sentences = "I live in LA but I like NY")
words_found
#> [[1]]
#> [[1]]$word
#> [1] "Los Angeles" "New York"   
#> 
#> [[1]]$start
#> [1] 11 25
#> 
#> [[1]]$end
#> [1] 12 26
#> 
#> 

## ------------------------------------------------
## Method `KeywordProcessor$replace_keys`
## ------------------------------------------------

library(rflashtext)

processor <- KeywordProcessor$new(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
new_sentences <- processor$replace_keys(sentences = "I live in LA but I like NY")
new_sentences
#> [1] "I live in Los Angeles but I like New York"