Regex Pattern R Programming

Writing Regex Pattern to Play Regex Golf, .R File

In this sample R programming assignment, the expert has demonstrated his prowess in solving a project related to Regex pattern. In this sample the expert has provided the required lists. Then he has demonstrated to write Regex patterns to play Regex golf on the given lists. Each of the pattern has been saved as a variable following a specified condition. These patterns have been saved in .R file. In the output file, score is reported under given constraints.

SOLUTION: –

regex_golf <- function(x, y, regex) {

xmatch <- str_extract_all(x, regex) == x

matched_x <- x[xmatch]

unmatched_x <- x[!xmatch]

ymatch <- str_extract_all(y, regex) == y

matched_y <- y[ymatch]

unmatched_y <- y[!ymatch]

penalty <- 10 * sum(!xmatch, ymatch)

score <- nchar(regex) + penalty

invisible(list(score = score, matched_x = matched_x, unmatched_x = unmatched_x,

matched_y = matched_y, unmatched_y = unmatched_y

))

}

get_artists <- function(year, n = 10){

page <- read_html(paste0(“http://billboardtop100of.com/”, year, “-2/”))

page %>%

html_table() %>%

`[[`(1) %>%

select(X2) %>%

unlist %>%

`names<-`(value = NULL) %>%

unique() %>%

str_replace_all(“‘”, “‘”) -> r

r <- r[sample(length(r), n)]

unname(vapply(r, utf8_normalize, map_quote = TRUE, FUN.VALUE = character(1)))

}

set.seed(104840861)

x <- get_artists(2010, n = 22)

y <- get_artists(1998, n = 15)

# problem 1

pat_1_1 <- ‘\\w{1,}oo\\w{0,}’

# problem 2

pat_1_2 <- ‘\\w{1,}ick’

# problem 3

pat_1_3 <- ‘^([a-f])[^g-z]\\w{0,}’

# problem 4

pat_1_4 <- ‘\\b(\\S+?)\\1\\S*\\b’

# problem 6

pat_1_6 <- ‘^(\\w(?!p)).*\\1$’

# problem 11

pat_1_11 <- ‘^(\\*?)(\\w*)(\\*?)(\\w*)(\\*?)(\\w*) .* ((?!\\1).+|\\1)\\2((?!\\3).+|\\3)\\4((?!\\5).+|\\5)\\6$’

#problem 7

pat_1_7 <- ‘(.)(.\\1){3}’

# problem 10

pat_1_10 <- ‘[02-5][123][257]|[07][0169]+3?$|55’

# Part 2

pat_2 <- ‘[:graph:]*[:space:]*[:graph:]*[:space:]*feat\\.*[:space:]*[:graph:]*[:space:]*[:graph:]*’

title: “Homework 4”

author: “Abhishek Sinha”

output: pdf_document

“`{r message=FALSE, warning=FALSE}

library(stringr)

library(tidyverse)

library(rvest)

library(utf8)

load(‘wordlists.RData’)

source(‘104840861_stats102a_hw4.R’)

“`

# Part 1

## Warmup

“`{r}

Warmup <- regex_golf(wordlists$Warmup$x, wordlists$Warmup$y, regex(pat_1_1))

list(Warmup$unmatched_x, Warmup$matched_y, Warmup$score)

“`

## Anchors

“`{r}

Anchors<- regex_golf(wordlists$Anchors$x, wordlists$Anchors$y, regex(pat_1_2))

list(Anchors$unmatched_x, Anchors$matched_y, Anchors$score)

“`

## Ranges

“`{r}

Ranges <- regex_golf(wordlists$Ranges$x, wordlists$Ranges$y, regex(pat_1_3))

list(Ranges$unmatched_x, Ranges$matched_y, Ranges$score)

“`

## Backrefs

“`{r}

Backrefs <- regex_golf(wordlists$Backrefs$x, wordlists$Backrefs$y, regex(pat_1_4))

list(Backrefs$unmatched_x, Backrefs$matched_y, Backrefs$score)

“`

## A man, A plan

“`{r}

A_man_a_plan <- regex_golf(wordlists$`A man, a plan`$x, wordlists$`A man, a plan`$y, regex(pat_1_6))

list(A_man_a_plan$unmatched_x, A_man_a_plan$matched_y, A_man_a_plan$score)

“`

## Prime

“`{r}

Prime <- regex_golf(wordlists$Prime$x, wordlists$Prime$y, regex(pat_1_7))

list(Prime$unmatched_x, Prime$matched_y, Prime$score)

“`

## Triples

“`{r}

Triples <- regex_golf(wordlists$Triples$x, wordlists$Triples$y, regex(pat_1_10))

list(Triples$unmatched_x, Triples$matched_y, Triples$score)

“`

## Glob

“`{r}

Glob <- regex_golf(wordlists$Glob$x, wordlists$Glob$y, regex(pat_1_11))

list(Glob$unmatched_x, Glob$matched_y, Glob$score)

“`

# Part 2

![billboard.](billboard.jpg)

“`{r}

set.seed(104840861)

x <- get_artists(2010, n = 22)

y <- get_artists(1998, n = 15)

Billboard <- regex_golf(x, y, regex(pat_2))

list(Billboard$unmatched_x, Billboard$matched_y, Billboard$score)

“`