前言
stringr包提供了一组用于处理字符串的内部一致工具,即被引号包围的字符序列。
stringr中的pattern
参数被解释为正则表达式,然后任何特殊字符都被解析。
在R中,将正则表达式写成字符串、即由引号(""
)或单引号(''
)包围的字符序列。
有些字符不能直接在R字符串中表示。这些字符必须表示为特殊字符,是具有特定含义的字符序列。
运行?"'"
可查看完整列表
因此,每当有正则表达式中出现一个\
时,就必须在字符串中将其写为\\
以表示正则表达式。
正则表达式
正则表达式是描述字符串中模式的简明语言。
匹配字符
see function(rx) str_view_all("abc ABC 123\t.!?\\(){}\n", rx)
see function(rx) str_view_all("abc ABC 123\t.!?\\(){}\n", rx)
选择
alt function(rx) str_view_all("abcde", rx)
alt function(rx) str_view_all("abcde", rx)
锚定
anchor function(rx) str_view_all("aaa", rx)
anchor function(rx) str_view_all("aaa", rx)
重复
quant function(rx) str_view_all(".a.aa.aaa", rx)
quant function(rx) str_view_all(".a.aa.aaa", rx)
分组
使用括号设置优先级(评估顺序)并创建组
应用
匹配检测
str_detect()
检测字符串中是否存在模式匹配。
fruit
## [1] "apple" "apricot" "avocado"
## [4] "banana" "bell pepper" "bilberry"
## [7] "blackberry" "blackcurrant" "blood orange"
## [10] "blueberry" "boysenberry" "breadfruit"
## [13] "canary melon" "cantaloupe" "cherimoya"
## [16] "cherry" "chili pepper" "clementine"
## [19] "cloudberry" "coconut" "cranberry"
## [22] "cucumber" "currant" "damson"
## [25] "date" "dragonfruit" "durian"
## [28] "eggplant" "elderberry" "feijoa"
## [31] "fig" "goji berry" "gooseberry"
## [34] "grape" "grapefruit" "guava"
## [37] "honeydew" "huckleberry" "jackfruit"
## [40] "jambul" "jujube" "kiwi fruit"
## [43] "kumquat" "lemon" "lime"
## [46] "loquat" "lychee" "mandarine"
## [49] "mango" "mulberry" "nectarine"
## [52] "nut" "olive" "orange"
## [55] "pamelo" "papaya" "passionfruit"
## [58] "peach" "pear" "persimmon"
## [61] "physalis" "pineapple" "plum"
## [64] "pomegranate" "pomelo" "purple mangosteen"
## [67] "quince" "raisin" "rambutan"
## [70] "raspberry" "redcurrant" "rock melon"
## [73] "salal berry" "satsuma" "star fruit"
## [76] "strawberry" "tamarillo" "tangerine"
## [79] "ugli fruit" "watermelon"
str_detect(fruit, "a")
## [1] TRUE TRUE TRUE TRUE FALSE FALSE TRUE TRUE TRUE FALSE FALSE TRUE
## [13] TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE TRUE FALSE TRUE TRUE
## [25] TRUE TRUE TRUE TRUE FALSE TRUE FALSE FALSE FALSE TRUE TRUE TRUE
## [37] FALSE FALSE TRUE TRUE FALSE FALSE TRUE FALSE FALSE TRUE FALSE TRUE
## [49] TRUE FALSE TRUE FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE FALSE
## [61] TRUE TRUE FALSE TRUE FALSE TRUE FALSE TRUE TRUE TRUE TRUE FALSE
## [73] TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE
fruit
## [1] "apple" "apricot" "avocado"
## [4] "banana" "bell pepper" "bilberry"
## [7] "blackberry" "blackcurrant" "blood orange"
## [10] "blueberry" "boysenberry" "breadfruit"
## [13] "canary melon" "cantaloupe" "cherimoya"
## [16] "cherry" "chili pepper" "clementine"
## [19] "cloudberry" "coconut" "cranberry"
## [22] "cucumber" "currant" "damson"
## [25] "date" "dragonfruit" "durian"
## [28] "eggplant" "elderberry" "feijoa"
## [31] "fig" "goji berry" "gooseberry"
## [34] "grape" "grapefruit" "guava"
## [37] "honeydew" "huckleberry" "jackfruit"
## [40] "jambul" "jujube" "kiwi fruit"
## [43] "kumquat" "lemon" "lime"
## [46] "loquat" "lychee" "mandarine"
## [49] "mango" "mulberry" "nectarine"
## [52] "nut" "olive" "orange"
## [55] "pamelo" "papaya" "passionfruit"
## [58] "peach" "pear" "persimmon"
## [61] "physalis" "pineapple" "plum"
## [64] "pomegranate" "pomelo" "purple mangosteen"
## [67] "quince" "raisin" "rambutan"
## [70] "raspberry" "redcurrant" "rock melon"
## [73] "salal berry" "satsuma" "star fruit"
## [76] "strawberry" "tamarillo" "tangerine"
## [79] "ugli fruit" "watermelon"
str_detect(fruit, "a")
## [1] TRUE TRUE TRUE TRUE FALSE FALSE TRUE TRUE TRUE FALSE FALSE TRUE
## [13] TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE TRUE FALSE TRUE TRUE
## [25] TRUE TRUE TRUE TRUE FALSE TRUE FALSE FALSE FALSE TRUE TRUE TRUE
## [37] FALSE FALSE TRUE TRUE FALSE FALSE TRUE FALSE FALSE TRUE FALSE TRUE
## [49] TRUE FALSE TRUE FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE FALSE
## [61] TRUE TRUE FALSE TRUE FALSE TRUE FALSE TRUE TRUE TRUE TRUE FALSE
## [73] TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE
str_which()
查找包含模式匹配的字符串的索引。
str_which(fruit, "a")
## [1] 1 2 3 4 7 8 9 12 13 14 15 21 23 24 25 26 27 28 30 34 35 36 39 40 43
## [26] 46 48 49 51 54 55 56 57 58 59 61 62 64 66 68 69 70 71 73 74 75 76 77 78 80
str_which(fruit, "a")
## [1] 1 2 3 4 7 8 9 12 13 14 15 21 23 24 25 26 27 28 30 34 35 36 39 40 43
## [26] 46 48 49 51 54 55 56 57 58 59 61 62 64 66 68 69 70 71 73 74 75 76 77 78 80
str_count()
统计字符串中的匹配数。
str_count(fruit, "a")
## [1] 1 1 2 3 0 0 1 2 1 0 0 1 2 2 1 0 0 0 0 0 1 0 1 1 1 1 1 1 0 1 0 0 0 1 1 2 0 0
## [39] 1 1 0 0 1 0 0 1 0 2 1 0 1 0 0 1 1 3 1 1 1 0 1 1 0 2 0 1 0 1 2 1 1 0 2 2 1 1
## [77] 2 1 0 1
str_count(fruit, "a")
## [1] 1 1 2 3 0 0 1 2 1 0 0 1 2 2 1 0 0 0 0 0 1 0 1 1 1 1 1 1 0 1 0 0 0 1 1 2 0 0
## [39] 1 1 0 0 1 0 0 1 0 2 1 0 1 0 0 1 1 3 1 1 1 0 1 1 0 2 0 1 0 1 2 1 1 0 2 2 1 1
## [77] 2 1 0 1
str_locate()
在字符串中定位模式首次匹配的位置。str_locate_all()
定位模式所有匹配的位置。
head(str_locate(fruit, "a"))
## start end
## [1,] 1 1
## [2,] 1 1
## [3,] 1 1
## [4,] 2 2
## [5,] NA NA
## [6,] NA NA
head(str_locate_all(fruit, "a"))
## [[1]]
## start end
## [1,] 1 1
##
## [[2]]
## start end
## [1,] 1 1
##
## [[3]]
## start end
## [1,] 1 1
## [2,] 5 5
##
## [[4]]
## start end
## [1,] 2 2
## [2,] 4 4
## [3,] 6 6
##
## [[5]]
## start end
##
## [[6]]
## start end
head(str_locate(fruit, "a"))
## start end
## [1,] 1 1
## [2,] 1 1
## [3,] 1 1
## [4,] 2 2
## [5,] NA NA
## [6,] NA NA
head(str_locate_all(fruit, "a"))
## [[1]]
## start end
## [1,] 1 1
##
## [[2]]
## start end
## [1,] 1 1
##
## [[3]]
## start end
## [1,] 1 1
## [2,] 5 5
##
## [[4]]
## start end
## [1,] 2 2
## [2,] 4 4
## [3,] 6 6
##
## [[5]]
## start end
##
## [[6]]
## start end
字符串取子集
str_sub()
从字符向量中提取子串。
str_sub(fruit, 1, 3)
## [1] "app" "apr" "avo" "ban" "bel" "bil" "bla" "bla" "blo" "blu" "boy" "bre"
## [13] "can" "can" "che" "che" "chi" "cle" "clo" "coc" "cra" "cuc" "cur" "dam"
## [25] "dat" "dra" "dur" "egg" "eld" "fei" "fig" "goj" "goo" "gra" "gra" "gua"
## [37] "hon" "huc" "jac" "jam" "juj" "kiw" "kum" "lem" "lim" "loq" "lyc" "man"
## [49] "man" "mul" "nec" "nut" "oli" "ora" "pam" "pap" "pas" "pea" "pea" "per"
## [61] "phy" "pin" "plu" "pom" "pom" "pur" "qui" "rai" "ram" "ras" "red" "roc"
## [73] "sal" "sat" "sta" "str" "tam" "tan" "ugl" "wat"
str_sub(fruit, -2)
## [1] "le" "ot" "do" "na" "er" "ry" "ry" "nt" "ge" "ry" "ry" "it" "on" "pe" "ya"
## [16] "ry" "er" "ne" "ry" "ut" "ry" "er" "nt" "on" "te" "it" "an" "nt" "ry" "oa"
## [31] "ig" "ry" "ry" "pe" "it" "va" "ew" "ry" "it" "ul" "be" "it" "at" "on" "me"
## [46] "at" "ee" "ne" "go" "ry" "ne" "ut" "ve" "ge" "lo" "ya" "it" "ch" "ar" "on"
## [61] "is" "le" "um" "te" "lo" "en" "ce" "in" "an" "ry" "nt" "on" "ry" "ma" "it"
## [76] "ry" "lo" "ne" "it" "on"
str_sub(fruit, 1, 3)
## [1] "app" "apr" "avo" "ban" "bel" "bil" "bla" "bla" "blo" "blu" "boy" "bre"
## [13] "can" "can" "che" "che" "chi" "cle" "clo" "coc" "cra" "cuc" "cur" "dam"
## [25] "dat" "dra" "dur" "egg" "eld" "fei" "fig" "goj" "goo" "gra" "gra" "gua"
## [37] "hon" "huc" "jac" "jam" "juj" "kiw" "kum" "lem" "lim" "loq" "lyc" "man"
## [49] "man" "mul" "nec" "nut" "oli" "ora" "pam" "pap" "pas" "pea" "pea" "per"
## [61] "phy" "pin" "plu" "pom" "pom" "pur" "qui" "rai" "ram" "ras" "red" "roc"
## [73] "sal" "sat" "sta" "str" "tam" "tan" "ugl" "wat"
str_sub(fruit, -2)
## [1] "le" "ot" "do" "na" "er" "ry" "ry" "nt" "ge" "ry" "ry" "it" "on" "pe" "ya"
## [16] "ry" "er" "ne" "ry" "ut" "ry" "er" "nt" "on" "te" "it" "an" "nt" "ry" "oa"
## [31] "ig" "ry" "ry" "pe" "it" "va" "ew" "ry" "it" "ul" "be" "it" "at" "on" "me"
## [46] "at" "ee" "ne" "go" "ry" "ne" "ut" "ve" "ge" "lo" "ya" "it" "ch" "ar" "on"
## [61] "is" "le" "um" "te" "lo" "en" "ce" "in" "an" "ry" "nt" "on" "ry" "ma" "it"
## [76] "ry" "lo" "ne" "it" "on"
str_subset()
只返回包含模式匹配的字符串。
str_subset(fruit, "b")
## [1] "banana" "bell pepper" "bilberry" "blackberry" "blackcurrant"
## [6] "blood orange" "blueberry" "boysenberry" "breadfruit" "cloudberry"
## [11] "cranberry" "cucumber" "elderberry" "goji berry" "gooseberry"
## [16] "huckleberry" "jambul" "jujube" "mulberry" "rambutan"
## [21] "raspberry" "salal berry" "strawberry"
str_subset(fruit, "b")
## [1] "banana" "bell pepper" "bilberry" "blackberry" "blackcurrant"
## [6] "blood orange" "blueberry" "boysenberry" "breadfruit" "cloudberry"
## [11] "cranberry" "cucumber" "elderberry" "goji berry" "gooseberry"
## [16] "huckleberry" "jambul" "jujube" "mulberry" "rambutan"
## [21] "raspberry" "salal berry" "strawberry"
str_extract()
返回每个字符串中找到的第一个模式匹配,作为向量。而str_extract_all()
返回每个模式匹配。
## [1] "a" "a" "a" "a" "e" "i" "a" "a" "o" "u" "o" "e" "a" "a" "e" "e" "i" "e" "o"
## [20] "o" "a" "u" "u" "a" "a" "a" "u" "e" "e" "e" "i" "o" "o" "a" "a" "u" "o" "u"
## [39] "a" "a" "u" "i" "u" "e" "i" "o" "e" "a" "a" "u" "e" "u" "o" "o" "a" "a" "a"
## [58] "e" "e" "e" "a" "i" "u" "o" "o" "u" "u" "a" "a" "a" "e" "o" "a" "a" "a" "a"
## [77] "a" "a" "u" "a"
## [1] "a" "a" "a" "a" "e" "i" "a" "a" "o" "u" "o" "e" "a" "a" "e" "e" "i" "e" "o"
## [20] "o" "a" "u" "u" "a" "a" "a" "u" "e" "e" "e" "i" "o" "o" "a" "a" "u" "o" "u"
## [39] "a" "a" "u" "i" "u" "e" "i" "o" "e" "a" "a" "u" "e" "u" "o" "o" "a" "a" "a"
## [58] "e" "e" "e" "a" "i" "u" "o" "o" "u" "u" "a" "a" "a" "e" "o" "a" "a" "a" "a"
## [77] "a" "a" "u" "a"
管理长度
str_length()
返回字符串的长度
str_length(fruit)
## [1] 5 7 7 6 11 8 10 12 12 9 11 10 12 10 9 6 12 10 10 7 9 8 7 6 4
## [26] 11 6 8 10 6 3 10 10 5 10 5 8 11 9 6 6 10 7 5 4 6 6 9 5 8
## [51] 9 3 5 6 6 6 12 5 4 9 8 9 4 11 6 17 6 6 8 9 10 10 11 7 10
## [76] 10 9 9 10 10
str_length(fruit)
## [1] 5 7 7 6 11 8 10 12 12 9 11 10 12 10 9 6 12 10 10 7 9 8 7 6 4
## [26] 11 6 8 10 6 3 10 10 5 10 5 8 11 9 6 6 10 7 5 4 6 6 9 5 8
## [51] 9 3 5 6 6 6 12 5 4 9 8 9 4 11 6 17 6 6 8 9 10 10 11 7 10
## [76] 10 9 9 10 10
str_pad()
将字符串填充到恒定长度。
str_pad(fruit, 17)
## [1] " apple" " apricot" " avocado"
## [4] " banana" " bell pepper" " bilberry"
## [7] " blackberry" " blackcurrant" " blood orange"
## [10] " blueberry" " boysenberry" " breadfruit"
## [13] " canary melon" " cantaloupe" " cherimoya"
## [16] " cherry" " chili pepper" " clementine"
## [19] " cloudberry" " coconut" " cranberry"
## [22] " cucumber" " currant" " damson"
## [25] " date" " dragonfruit" " durian"
## [28] " eggplant" " elderberry" " feijoa"
## [31] " fig" " goji berry" " gooseberry"
## [34] " grape" " grapefruit" " guava"
## [37] " honeydew" " huckleberry" " jackfruit"
## [40] " jambul" " jujube" " kiwi fruit"
## [43] " kumquat" " lemon" " lime"
## [46] " loquat" " lychee" " mandarine"
## [49] " mango" " mulberry" " nectarine"
## [52] " nut" " olive" " orange"
## [55] " pamelo" " papaya" " passionfruit"
## [58] " peach" " pear" " persimmon"
## [61] " physalis" " pineapple" " plum"
## [64] " pomegranate" " pomelo" "purple mangosteen"
## [67] " quince" " raisin" " rambutan"
## [70] " raspberry" " redcurrant" " rock melon"
## [73] " salal berry" " satsuma" " star fruit"
## [76] " strawberry" " tamarillo" " tangerine"
## [79] " ugli fruit" " watermelon"
str_pad(fruit, 17)
## [1] " apple" " apricot" " avocado"
## [4] " banana" " bell pepper" " bilberry"
## [7] " blackberry" " blackcurrant" " blood orange"
## [10] " blueberry" " boysenberry" " breadfruit"
## [13] " canary melon" " cantaloupe" " cherimoya"
## [16] " cherry" " chili pepper" " clementine"
## [19] " cloudberry" " coconut" " cranberry"
## [22] " cucumber" " currant" " damson"
## [25] " date" " dragonfruit" " durian"
## [28] " eggplant" " elderberry" " feijoa"
## [31] " fig" " goji berry" " gooseberry"
## [34] " grape" " grapefruit" " guava"
## [37] " honeydew" " huckleberry" " jackfruit"
## [40] " jambul" " jujube" " kiwi fruit"
## [43] " kumquat" " lemon" " lime"
## [46] " loquat" " lychee" " mandarine"
## [49] " mango" " mulberry" " nectarine"
## [52] " nut" " olive" " orange"
## [55] " pamelo" " papaya" " passionfruit"
## [58] " peach" " pear" " persimmon"
## [61] " physalis" " pineapple" " plum"
## [64] " pomegranate" " pomelo" "purple mangosteen"
## [67] " quince" " raisin" " rambutan"
## [70] " raspberry" " redcurrant" " rock melon"
## [73] " salal berry" " satsuma" " star fruit"
## [76] " strawberry" " tamarillo" " tangerine"
## [79] " ugli fruit" " watermelon"
str_trunc()
截断字符串的长度,用省略号替换内容。
str_trunc(fruit, 3)
## [1] "..." "..." "..." "..." "..." "..." "..." "..." "..." "..." "..." "..."
## [13] "..." "..." "..." "..." "..." "..." "..." "..." "..." "..." "..." "..."
## [25] "..." "..." "..." "..." "..." "..." "fig" "..." "..." "..." "..." "..."
## [37] "..." "..." "..." "..." "..." "..." "..." "..." "..." "..." "..." "..."
## [49] "..." "..." "..." "nut" "..." "..." "..." "..." "..." "..." "..." "..."
## [61] "..." "..." "..." "..." "..." "..." "..." "..." "..." "..." "..." "..."
## [73] "..." "..." "..." "..." "..." "..." "..." "..."
str_trunc(fruit, 3)
## [1] "..." "..." "..." "..." "..." "..." "..." "..." "..." "..." "..." "..."
## [13] "..." "..." "..." "..." "..." "..." "..." "..." "..." "..." "..." "..."
## [25] "..." "..." "..." "..." "..." "..." "fig" "..." "..." "..." "..." "..."
## [37] "..." "..." "..." "..." "..." "..." "..." "..." "..." "..." "..." "..."
## [49] "..." "..." "..." "nut" "..." "..." "..." "..." "..." "..." "..." "..."
## [61] "..." "..." "..." "..." "..." "..." "..." "..." "..." "..." "..." "..."
## [73] "..." "..." "..." "..." "..." "..." "..." "..."
str_trim()
从字符串的开始和(或者)结束位置去除空白。参数side = "both"
则从两侧同时修剪,而参数side = "left"
,side = "right"
分别仅从开始和结尾位置去除空白。
str_trim(fruit)
## [1] "apple" "apricot" "avocado"
## [4] "banana" "bell pepper" "bilberry"
## [7] "blackberry" "blackcurrant" "blood orange"
## [10] "blueberry" "boysenberry" "breadfruit"
## [13] "canary melon" "cantaloupe" "cherimoya"
## [16] "cherry" "chili pepper" "clementine"
## [19] "cloudberry" "coconut" "cranberry"
## [22] "cucumber" "currant" "damson"
## [25] "date" "dragonfruit" "durian"
## [28] "eggplant" "elderberry" "feijoa"
## [31] "fig" "goji berry" "gooseberry"
## [34] "grape" "grapefruit" "guava"
## [37] "honeydew" "huckleberry" "jackfruit"
## [40] "jambul" "jujube" "kiwi fruit"
## [43] "kumquat" "lemon" "lime"
## [46] "loquat" "lychee" "mandarine"
## [49] "mango" "mulberry" "nectarine"
## [52] "nut" "olive" "orange"
## [55] "pamelo" "papaya" "passionfruit"
## [58] "peach" "pear" "persimmon"
## [61] "physalis" "pineapple" "plum"
## [64] "pomegranate" "pomelo" "purple mangosteen"
## [67] "quince" "raisin" "rambutan"
## [70] "raspberry" "redcurrant" "rock melon"
## [73] "salal berry" "satsuma" "star fruit"
## [76] "strawberry" "tamarillo" "tangerine"
## [79] "ugli fruit" "watermelon"
str_trim(fruit)
## [1] "apple" "apricot" "avocado"
## [4] "banana" "bell pepper" "bilberry"
## [7] "blackberry" "blackcurrant" "blood orange"
## [10] "blueberry" "boysenberry" "breadfruit"
## [13] "canary melon" "cantaloupe" "cherimoya"
## [16] "cherry" "chili pepper" "clementine"
## [19] "cloudberry" "coconut" "cranberry"
## [22] "cucumber" "currant" "damson"
## [25] "date" "dragonfruit" "durian"
## [28] "eggplant" "elderberry" "feijoa"
## [31] "fig" "goji berry" "gooseberry"
## [34] "grape" "grapefruit" "guava"
## [37] "honeydew" "huckleberry" "jackfruit"
## [40] "jambul" "jujube" "kiwi fruit"
## [43] "kumquat" "lemon" "lime"
## [46] "loquat" "lychee" "mandarine"
## [49] "mango" "mulberry" "nectarine"
## [52] "nut" "olive" "orange"
## [55] "pamelo" "papaya" "passionfruit"
## [58] "peach" "pear" "persimmon"
## [61] "physalis" "pineapple" "plum"
## [64] "pomegranate" "pomelo" "purple mangosteen"
## [67] "quince" "raisin" "rambutan"
## [70] "raspberry" "redcurrant" "rock melon"
## [73] "salal berry" "satsuma" "star fruit"
## [76] "strawberry" "tamarillo" "tangerine"
## [79] "ugli fruit" "watermelon"
修改字符串
通过用str_sub()
标识子字符串来替换子字符串,并赋值给结果。
str_sub(fruit, 1, 3) "str"
head(fruit)
## [1] "strle" "stricot" "strcado" "strana" "strl pepper"
## [6] "strberry"
str_sub(fruit, 1, 3) "str"
head(fruit)
## [1] "strle" "stricot" "strcado" "strana" "strl pepper"
## [6] "strberry"
str_replace()
替换每个字符串中的第一个匹配模式。
str_replace(fruit, "a", "-")
## [1] "-pple" "-pricot" "-vocado"
## [4] "b-nana" "bell pepper" "bilberry"
## [7] "bl-ckberry" "bl-ckcurrant" "blood or-nge"
## [10] "blueberry" "boysenberry" "bre-dfruit"
## [13] "c-nary melon" "c-ntaloupe" "cherimoy-"
## [16] "cherry" "chili pepper" "clementine"
## [19] "cloudberry" "coconut" "cr-nberry"
## [22] "cucumber" "curr-nt" "d-mson"
## [25] "d-te" "dr-gonfruit" "duri-n"
## [28] "eggpl-nt" "elderberry" "feijo-"
## [31] "fig" "goji berry" "gooseberry"
## [34] "gr-pe" "gr-pefruit" "gu-va"
## [37] "honeydew" "huckleberry" "j-ckfruit"
## [40] "j-mbul" "jujube" "kiwi fruit"
## [43] "kumqu-t" "lemon" "lime"
## [46] "loqu-t" "lychee" "m-ndarine"
## [49] "m-ngo" "mulberry" "nect-rine"
## [52] "nut" "olive" "or-nge"
## [55] "p-melo" "p-paya" "p-ssionfruit"
## [58] "pe-ch" "pe-r" "persimmon"
## [61] "phys-lis" "pine-pple" "plum"
## [64] "pomegr-nate" "pomelo" "purple m-ngosteen"
## [67] "quince" "r-isin" "r-mbutan"
## [70] "r-spberry" "redcurr-nt" "rock melon"
## [73] "s-lal berry" "s-tsuma" "st-r fruit"
## [76] "str-wberry" "t-marillo" "t-ngerine"
## [79] "ugli fruit" "w-termelon"
str_replace(fruit, "a", "-")
## [1] "-pple" "-pricot" "-vocado"
## [4] "b-nana" "bell pepper" "bilberry"
## [7] "bl-ckberry" "bl-ckcurrant" "blood or-nge"
## [10] "blueberry" "boysenberry" "bre-dfruit"
## [13] "c-nary melon" "c-ntaloupe" "cherimoy-"
## [16] "cherry" "chili pepper" "clementine"
## [19] "cloudberry" "coconut" "cr-nberry"
## [22] "cucumber" "curr-nt" "d-mson"
## [25] "d-te" "dr-gonfruit" "duri-n"
## [28] "eggpl-nt" "elderberry" "feijo-"
## [31] "fig" "goji berry" "gooseberry"
## [34] "gr-pe" "gr-pefruit" "gu-va"
## [37] "honeydew" "huckleberry" "j-ckfruit"
## [40] "j-mbul" "jujube" "kiwi fruit"
## [43] "kumqu-t" "lemon" "lime"
## [46] "loqu-t" "lychee" "m-ndarine"
## [49] "m-ngo" "mulberry" "nect-rine"
## [52] "nut" "olive" "or-nge"
## [55] "p-melo" "p-paya" "p-ssionfruit"
## [58] "pe-ch" "pe-r" "persimmon"
## [61] "phys-lis" "pine-pple" "plum"
## [64] "pomegr-nate" "pomelo" "purple m-ngosteen"
## [67] "quince" "r-isin" "r-mbutan"
## [70] "r-spberry" "redcurr-nt" "rock melon"
## [73] "s-lal berry" "s-tsuma" "st-r fruit"
## [76] "str-wberry" "t-marillo" "t-ngerine"
## [79] "ugli fruit" "w-termelon"
str_replace_all()
替换每个字符串中所有匹配的模式。
str_replace_all(fruit, "a", "-")
## [1] "-pple" "-pricot" "-voc-do"
## [4] "b-n-n-" "bell pepper" "bilberry"
## [7] "bl-ckberry" "bl-ckcurr-nt" "blood or-nge"
## [10] "blueberry" "boysenberry" "bre-dfruit"
## [13] "c-n-ry melon" "c-nt-loupe" "cherimoy-"
## [16] "cherry" "chili pepper" "clementine"
## [19] "cloudberry" "coconut" "cr-nberry"
## [22] "cucumber" "curr-nt" "d-mson"
## [25] "d-te" "dr-gonfruit" "duri-n"
## [28] "eggpl-nt" "elderberry" "feijo-"
## [31] "fig" "goji berry" "gooseberry"
## [34] "gr-pe" "gr-pefruit" "gu-v-"
## [37] "honeydew" "huckleberry" "j-ckfruit"
## [40] "j-mbul" "jujube" "kiwi fruit"
## [43] "kumqu-t" "lemon" "lime"
## [46] "loqu-t" "lychee" "m-nd-rine"
## [49] "m-ngo" "mulberry" "nect-rine"
## [52] "nut" "olive" "or-nge"
## [55] "p-melo" "p-p-y-" "p-ssionfruit"
## [58] "pe-ch" "pe-r" "persimmon"
## [61] "phys-lis" "pine-pple" "plum"
## [64] "pomegr-n-te" "pomelo" "purple m-ngosteen"
## [67] "quince" "r-isin" "r-mbut-n"
## [70] "r-spberry" "redcurr-nt" "rock melon"
## [73] "s-l-l berry" "s-tsum-" "st-r fruit"
## [76] "str-wberry" "t-m-rillo" "t-ngerine"
## [79] "ugli fruit" "w-termelon"
str_replace_all(fruit, "a", "-")
## [1] "-pple" "-pricot" "-voc-do"
## [4] "b-n-n-" "bell pepper" "bilberry"
## [7] "bl-ckberry" "bl-ckcurr-nt" "blood or-nge"
## [10] "blueberry" "boysenberry" "bre-dfruit"
## [13] "c-n-ry melon" "c-nt-loupe" "cherimoy-"
## [16] "cherry" "chili pepper" "clementine"
## [19] "cloudberry" "coconut" "cr-nberry"
## [22] "cucumber" "curr-nt" "d-mson"
## [25] "d-te" "dr-gonfruit" "duri-n"
## [28] "eggpl-nt" "elderberry" "feijo-"
## [31] "fig" "goji berry" "gooseberry"
## [34] "gr-pe" "gr-pefruit" "gu-v-"
## [37] "honeydew" "huckleberry" "j-ckfruit"
## [40] "j-mbul" "jujube" "kiwi fruit"
## [43] "kumqu-t" "lemon" "lime"
## [46] "loqu-t" "lychee" "m-nd-rine"
## [49] "m-ngo" "mulberry" "nect-rine"
## [52] "nut" "olive" "or-nge"
## [55] "p-melo" "p-p-y-" "p-ssionfruit"
## [58] "pe-ch" "pe-r" "persimmon"
## [61] "phys-lis" "pine-pple" "plum"
## [64] "pomegr-n-te" "pomelo" "purple m-ngosteen"
## [67] "quince" "r-isin" "r-mbut-n"
## [70] "r-spberry" "redcurr-nt" "rock melon"
## [73] "s-l-l berry" "s-tsum-" "st-r fruit"
## [76] "str-wberry" "t-m-rillo" "t-ngerine"
## [79] "ugli fruit" "w-termelon"
str_to_lower()
将字符串转换为小写。
head(str_to_lower(sentences))
## [1] "the birch canoe slid on the smooth planks."
## [2] "glue the sheet to the dark blue background."
## [3] "it's easy to tell the depth of a well."
## [4] "these days a chicken leg is a rare dish."
## [5] "rice is often served in round bowls."
## [6] "the juice of lemons makes fine punch."
head(str_to_lower(sentences))
## [1] "the birch canoe slid on the smooth planks."
## [2] "glue the sheet to the dark blue background."
## [3] "it's easy to tell the depth of a well."
## [4] "these days a chicken leg is a rare dish."
## [5] "rice is often served in round bowls."
## [6] "the juice of lemons makes fine punch."
str_to_upper()
将字符串转换为大写。
head(str_to_upper(sentences))
## [1] "THE BIRCH CANOE SLID ON THE SMOOTH PLANKS."
## [2] "GLUE THE SHEET TO THE DARK BLUE BACKGROUND."
## [3] "IT'S EASY TO TELL THE DEPTH OF A WELL."
## [4] "THESE DAYS A CHICKEN LEG IS A RARE DISH."
## [5] "RICE IS OFTEN SERVED IN ROUND BOWLS."
## [6] "THE JUICE OF LEMONS MAKES FINE PUNCH."
head(str_to_upper(sentences))
## [1] "THE BIRCH CANOE SLID ON THE SMOOTH PLANKS."
## [2] "GLUE THE SHEET TO THE DARK BLUE BACKGROUND."
## [3] "IT'S EASY TO TELL THE DEPTH OF A WELL."
## [4] "THESE DAYS A CHICKEN LEG IS A RARE DISH."
## [5] "RICE IS OFTEN SERVED IN ROUND BOWLS."
## [6] "THE JUICE OF LEMONS MAKES FINE PUNCH."
str_to_title()
将字符串转换为标题大小写。
head(str_to_title(sentences))
## [1] "The Birch Canoe Slid On The Smooth Planks."
## [2] "Glue The Sheet To The Dark Blue Background."
## [3] "It's Easy To Tell The Depth Of A Well."
## [4] "These Days A Chicken Leg Is A Rare Dish."
## [5] "Rice Is Often Served In Round Bowls."
## [6] "The Juice Of Lemons Makes Fine Punch."
head(str_to_title(sentences))
## [1] "The Birch Canoe Slid On The Smooth Planks."
## [2] "Glue The Sheet To The Dark Blue Background."
## [3] "It's Easy To Tell The Depth Of A Well."
## [4] "These Days A Chicken Leg Is A Rare Dish."
## [5] "Rice Is Often Served In Round Bowls."
## [6] "The Juice Of Lemons Makes Fine Punch."
合并和切分
str_c()
将多个字符串合并为一个字符串
str_c(letters, LETTERS)
## [1] "aA" "bB" "cC" "dD" "eE" "fF" "gG" "hH" "iI" "jJ" "kK" "lL" "mM" "nN" "oO"
## [16] "pP" "qQ" "rR" "sS" "tT" "uU" "vV" "wW" "xX" "yY" "zZ"
str_c(letters, LETTERS)
## [1] "aA" "bB" "cC" "dD" "eE" "fF" "gG" "hH" "iI" "jJ" "kK" "lL" "mM" "nN" "oO"
## [16] "pP" "qQ" "rR" "sS" "tT" "uU" "vV" "wW" "xX" "yY" "zZ"
str_c()
将字符串向量折叠成单个字符串。
str_c(letters, collapse = "")
## [1] "abcdefghijklmnopqrstuvwxyz"
str_c(letters, collapse = "")
## [1] "abcdefghijklmnopqrstuvwxyz"
str_dup()
重复字符串次数。
head(str_dup(fruit, times = 2))
## [1] "appleapple" "apricotapricot" "avocadoavocado"
## [4] "bananabanana" "bell pepperbell pepper" "bilberrybilberry"
head(str_dup(fruit, times = 2))
## [1] "appleapple" "apricotapricot" "avocadoavocado"
## [4] "bananabanana" "bell pepperbell pepper" "bilberrybilberry"
str_split_fixed()
将字符串向量拆分为子字符串矩阵(在出现模式匹配时拆分)。而str_split()
返回一个子字符串列表。
head(str_split_fixed(fruit, " ", n=2))
## [,1] [,2]
## [1,] "apple" ""
## [2,] "apricot" ""
## [3,] "avocado" ""
## [4,] "banana" ""
## [5,] "bell" "pepper"
## [6,] "bilberry" ""
head(str_split(fruit, " ", n=2))
## [[1]]
## [1] "apple"
##
## [[2]]
## [1] "apricot"
##
## [[3]]
## [1] "avocado"
##
## [[4]]
## [1] "banana"
##
## [[5]]
## [1] "bell" "pepper"
##
## [[6]]
## [1] "bilberry"
head(str_split_fixed(fruit, " ", n=2))
## [,1] [,2]
## [1,] "apple" ""
## [2,] "apricot" ""
## [3,] "avocado" ""
## [4,] "banana" ""
## [5,] "bell" "pepper"
## [6,] "bilberry" ""
head(str_split(fruit, " ", n=2))
## [[1]]
## [1] "apple"
##
## [[2]]
## [1] "apricot"
##
## [[3]]
## [1] "avocado"
##
## [[4]]
## [1] "banana"
##
## [[5]]
## [1] "bell" "pepper"
##
## [[6]]
## [1] "bilberry"
str_glue()
根据字符串和{表达式}创建一个字符串求值。
str_glue("Pi is {pi}")
## Pi is 3.14159265358979
str_glue("Pi is {pi}")
## Pi is 3.14159265358979
使用数据框、列表或环境从字符串和{表达式}创建字符串以进行求值。
head(str_glue_data(mtcars, "{rownames(mtcars)} has {hp} hp"))
## Mazda RX4 has 110 hp
## Mazda RX4 Wag has 110 hp
## Datsun 710 has 93 hp
## Hornet 4 Drive has 110 hp
## Hornet Sportabout has 175 hp
## Valiant has 105 hp
head(str_glue_data(mtcars, "{rownames(mtcars)} has {hp} hp"))
## Mazda RX4 has 110 hp
## Mazda RX4 Wag has 110 hp
## Datsun 710 has 93 hp
## Hornet 4 Drive has 110 hp
## Hornet Sportabout has 175 hp
## Valiant has 105 hp
排序字符串
str_order()
返回排序字符向量的索引向量。
x 'c','b','a')
x[str_order(x)]
## [1] "a" "b" "c"
x 'c','b','a')
x[str_order(x)]
## [1] "a" "b" "c"
str_sort()
排序字符向量
str_sort(x)
## [1] "a" "b" "c"
str_sort(x)
## [1] "a" "b" "c"
助手
str_conv()
重写字符串的编码。
head(str_conv(fruit,"ISO-8859-1"))
## [1] "apple" "apricot" "avocado" "banana" "bell pepper"
## [6] "bilberry"
head(str_conv(fruit,"ISO-8859-1"))
## [1] "apple" "apricot" "avocado" "banana" "bell pepper"
## [6] "bilberry"
str_view()
查看每个字符串中第一个正则表达式匹配项的HTML呈现。
str_view(fruit[1:10], "[aeiou]")
str_view(fruit[1:10], "[aeiou]")
str_view_all()
查看所有正则表达式匹配的HTML呈现。
str_view_all(fruit[1:10], "[aeiou]")
str_view_all(fruit[1:10], "[aeiou]")
参考:
https://rstudio.com/resources/cheatsheets/