9  regex

9.1 Select/Remove Items

9.1.1 Select

thispatternexample <- "example1"
patternexample     <- "example2"
thispattern        <- "example3"
mget(ls(pattern = "pattern"))  #pattern included within
$patternexample
[1] "example2"

$thispattern
[1] "example3"

$thispatternexample
[1] "example1"
mget(ls(pattern = "^pattern")) #starts with pattern 
$patternexample
[1] "example2"
mget(ls(pattern = "pattern$")) #ends with pattern
$thispattern
[1] "example3"

9.1.2 Remove

rm(list = ls(pattern = "pattern"))
rm(list = ls(pattern = "^prefix"))
rm(list = ls(pattern = "suffix$"))

9.1.3 Special Characters (like .)

df.1_toprows   <- mtcars[1:2,]
df.2_middle    <- mtcars[3:4,]
df.3_otherrows <- mtcars[5:6,] 
df4other       <- mtcars[7:8,]

#anythign that starts with df
mget(ls(pattern="^df")) |> names()
[1] "df.1_toprows"   "df.2_middle"    "df.3_otherrows" "df4other"      
#ignores . (need to denote special character)
mget(ls(pattern="^df.")) |> names()
[1] "df.1_toprows"   "df.2_middle"    "df.3_otherrows" "df4other"      
# use double slash for special character
mget(ls(pattern="^df\\.")) |> names()
[1] "df.1_toprows"   "df.2_middle"    "df.3_otherrows"
# can use bind_rows to combine all df's
#(if they are the same variables) 
dplyr::bind_rows(mget(ls(pattern="^df\\.")))
                   mpg cyl disp  hp drat    wt  qsec vs am gear carb
Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1

9.2 Multiple Patterns

Use | to search for multiple patterns

mainstring    <- c(
    "I was born in MN"
  , "I currently live in CA"
  , "I have visited WA, but it was a long time ago"
)
patternstring <- c("MN","CA","WA")

#search for if state is at end of string 
searchstring <- paste0(patternstring, "$", collapse = "|") 
grep(searchstring, mainstring, value = TRUE) 
[1] "I was born in MN"       "I currently live in CA"

9.3 Match Start/End, Ignore Middle

pattern = ^start.*end$ (put .* in between ^start and end$)

Example, I want pull all workbooks that start with “DATA” that are csv files.

files <- c(
    "DATA for Martha.xlsx"#exclude 
  , "DATA_2022.csv"       #INCLUDE
  , "DATA_2021.csv"       #INCLUDE
  , "DATA_notes.csv"      #INCLUDE
  , "quick_crosstab.csv"  #exclude
  , "DATA weird file csv" #exclude
)

files[grep(pattern = "^DATA.*\\.csv$", files)]
[1] "DATA_2022.csv"  "DATA_2021.csv"  "DATA_notes.csv"

9.4 Match Letters and White Space

ex_string  <- c("Mean 21.34", "Medium 20.50")

#match all non-numeric characters and replace with ''
sub("\\D+", '', ex_string)
[1] "21.34" "20.50"

9.5 Cheat Sheet (Table)

RegEx from Hypebright