Skip to contents

Based on the python library flashtext. To see more details about the algorithm visit: FlashText

Methods


Method new()

Usage

keyword_processor$new(
  ignore_case = TRUE,
  word_chars = c(letters, LETTERS, 0:9, "_"),
  dict = NULL
)

Arguments

ignore_case

logical. If FALSE the search is case sensitive. Default TRUE.

word_chars

character vector. Used to validate if a word continues. Default c(letters, LETTERS, 0:9, "_") equivalent to [a-zA-Z0-9_].

dict

list. Internally built character by character and needed for the search. Recommended to let the default value NULL.

Returns

invisible. Assign to a variable to inspect the output. Logical. TRUE if all went good.

Examples

library(rflashtext)

processor <- keyword_processor$new(ignore_case = FALSE, word_chars = letters)
processor


Method show_attrs()

Usage

keyword_processor$show_attrs(attrs = "all")

Arguments

attrs

character vector. Options are subsets of c("all", "id", "word_chars", "dict", "ignore_case", "dict_size"). Default "all".

Returns

list with the values of the attrs. Useful to save dict and reuse it or to check the dict_size.

Examples

library(rflashtext)

processor <- keyword_processor$new()
processor$add_keys_words(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$show_attrs(attrs = "dict_size")
processor$show_attrs(attrs = "dict")


Method add_keys_words()

Usage

keyword_processor$add_keys_words(keys, words = NULL)

Arguments

keys

character vector. Strings to identify (find/replace) in the text.

words

character vector. Strings to be returned (find) or replaced (replace) when found the respective keys. Should have the same length as keys. If not provided, words = keys.

Returns

invisible. Assign to a variable to inspect the output. Logical vector. FALSE if keys are duplicated, the respective words will be updated.

Examples

library(rflashtext)

processor <- keyword_processor$new()
processor$add_keys_words(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
correct <- processor$add_keys_words(keys = c("NY", "CA"), words = c("New York City", "California"))
# To check if there are duplicate keys
correct


Method contain_keys()

Usage

keyword_processor$contain_keys(keys)

Arguments

keys

character vector. Strings to check if already are on the search dictionary.

Returns

logical vector. TRUE if the keys are on the search dictionary.

Examples

library(rflashtext)

processor <- keyword_processor$new()
processor$add_keys_words(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$contain_keys(keys = c("NY", "LA", "TX"))


Method get_words()

Usage

keyword_processor$get_words(keys)

Arguments

keys

character vector. Strings to get back the respective words.

Returns

character vector. Respective words. If keys not found returns NA_character_.

Examples

library(rflashtext)

processor <- keyword_processor$new()
processor$add_keys_words(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$get_words(keys = c("NY", "LA", "TX"))


Method find_keys()

Usage

keyword_processor$find_keys(sentence, span_info = TRUE)

Arguments

sentence

character. Text to find the keys previously defined. Not vectorized.

span_info

logical. TRUE to retrieve the words and the position of the matches. FALSE to only retrieve the words. Default TRUE.

Returns

list with the words corresponding to keys found in the sentence. Hint: Use do.call(rbind, ...) to transform the list to a matrix.

Examples

library(rflashtext)

processor <- keyword_processor$new()
processor$add_keys_words(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
words_found <- processor$find_keys(sentence = "I live in LA but I like NY")
do.call(rbind, words_found)


Method replace_keys()

Usage

keyword_processor$replace_keys(sentence)

Arguments

sentence

character. Text to replace the keys found by the corresponding words. Not vectorized.

Returns

character. Text with the keys replaced by the respective words.

Examples

library(rflashtext)

processor <- keyword_processor$new()
processor$add_keys_words(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
new_sentence <- processor$replace_keys(sentence = "I live in LA but I like NY")
new_sentence

Examples

library(rflashtext)

processor <- keyword_processor$new()
#> Use `KeywordProcessor` instead for better performance
processor$add_keys_words(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))

processor$contain_keys(keys = "NY")
#> [1] TRUE
processor$get_words(keys = "LA")
#> [1] "Los Angeles"

processor$find_keys(sentence = "I live in LA but I like NY")
#> [[1]]
#> [[1]]$word
#> [1] "Los Angeles"
#> 
#> [[1]]$start
#> [1] 11
#> 
#> [[1]]$end
#> [1] 13
#> 
#> 
#> [[2]]
#> [[2]]$word
#> [1] "New York"
#> 
#> [[2]]$start
#> [1] 25
#> 
#> [[2]]$end
#> [1] 26
#> 
#> 
processor$replace_keys(sentence = "I live in LA but I like NY")
#> [1] "I live in Los Angeles but I like New York"

## ------------------------------------------------
## Method `keyword_processor$new`
## ------------------------------------------------

library(rflashtext)

processor <- keyword_processor$new(ignore_case = FALSE, word_chars = letters)
#> Use `KeywordProcessor` instead for better performance
processor
#> <keyword_processor>
#>   Public:
#>     add_keys_words: function (keys, words = NULL) 
#>     contain_keys: function (keys) 
#>     find_keys: function (sentence, span_info = TRUE) 
#>     get_words: function (keys) 
#>     initialize: function (ignore_case = TRUE, word_chars = c(letters, LETTERS, 
#>     replace_keys: function (sentence) 
#>     show_attrs: function (attrs = "all") 
#>   Private:
#>     add_key_word: function (key, word) 
#>     attrs: list
#>     contain_key: function (key) 
#>     find_key: function (sentence, span_info) 
#>     get_word: function (key) 
#>     replace_key: function (sentence) 
#>     set_attr: function (id, ignore_case, word_chars, dict) 

## ------------------------------------------------
## Method `keyword_processor$show_attrs`
## ------------------------------------------------

library(rflashtext)

processor <- keyword_processor$new()
#> Use `KeywordProcessor` instead for better performance
processor$add_keys_words(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$show_attrs(attrs = "dict_size")
#> [1] 2
processor$show_attrs(attrs = "dict")
#> $`_class_`
#> [1] "keyword_dictionary"
#> 
#> $n
#> $n$y
#> $n$y$`_word_`
#> [1] "New York"
#> 
#> 
#> 
#> $l
#> $l$a
#> $l$a$`_word_`
#> [1] "Los Angeles"
#> 
#> 
#> 

## ------------------------------------------------
## Method `keyword_processor$add_keys_words`
## ------------------------------------------------

library(rflashtext)

processor <- keyword_processor$new()
#> Use `KeywordProcessor` instead for better performance
processor$add_keys_words(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
correct <- processor$add_keys_words(keys = c("NY", "CA"), words = c("New York City", "California"))
#> Warning: There are duplicate keys. To a better check assign the output to a variable.
# To check if there are duplicate keys
correct
#> [1] FALSE  TRUE

## ------------------------------------------------
## Method `keyword_processor$contain_keys`
## ------------------------------------------------

library(rflashtext)

processor <- keyword_processor$new()
#> Use `KeywordProcessor` instead for better performance
processor$add_keys_words(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$contain_keys(keys = c("NY", "LA", "TX"))
#> [1]  TRUE  TRUE FALSE

## ------------------------------------------------
## Method `keyword_processor$get_words`
## ------------------------------------------------

library(rflashtext)

processor <- keyword_processor$new()
#> Use `KeywordProcessor` instead for better performance
processor$add_keys_words(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$get_words(keys = c("NY", "LA", "TX"))
#> [1] "New York"    "Los Angeles" NA           

## ------------------------------------------------
## Method `keyword_processor$find_keys`
## ------------------------------------------------

library(rflashtext)

processor <- keyword_processor$new()
#> Use `KeywordProcessor` instead for better performance
processor$add_keys_words(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
words_found <- processor$find_keys(sentence = "I live in LA but I like NY")
do.call(rbind, words_found)
#>      word          start end
#> [1,] "Los Angeles" 11    13 
#> [2,] "New York"    25    26 

## ------------------------------------------------
## Method `keyword_processor$replace_keys`
## ------------------------------------------------

library(rflashtext)

processor <- keyword_processor$new()
#> Use `KeywordProcessor` instead for better performance
processor$add_keys_words(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
new_sentence <- processor$replace_keys(sentence = "I live in LA but I like NY")
new_sentence
#> [1] "I live in Los Angeles but I like New York"