Writing Regex Pattern to Play Regex Golf, .R File
In this sample R programming assignment, the expert has demonstrated his prowess in solving a project related to Regex pattern. In this sample the expert has provided the required lists. Then he has demonstrated to write Regex patterns to play Regex golf on the given lists. Each of the pattern has been saved as a variable following a specified condition. These patterns have been saved in .R file. In the output file, score is reported under given constraints.
SOLUTION: –
regex_golf <- function(x, y, regex) {
xmatch <- str_extract_all(x, regex) == x
matched_x <- x[xmatch]
unmatched_x <- x[!xmatch]
ymatch <- str_extract_all(y, regex) == y
matched_y <- y[ymatch]
unmatched_y <- y[!ymatch]
penalty <- 10 * sum(!xmatch, ymatch)
score <- nchar(regex) + penalty
invisible(list(score = score, matched_x = matched_x, unmatched_x = unmatched_x,
matched_y = matched_y, unmatched_y = unmatched_y
))
}
get_artists <- function(year, n = 10){
page <- read_html(paste0(“http://billboardtop100of.com/”, year, “-2/”))
page %>%
html_table() %>%
`[[`(1) %>%
select(X2) %>%
unlist %>%
`names<-`(value = NULL) %>%
unique() %>%
str_replace_all(“‘”, “‘”) -> r
r <- r[sample(length(r), n)]
unname(vapply(r, utf8_normalize, map_quote = TRUE, FUN.VALUE = character(1)))
}
set.seed(104840861)
x <- get_artists(2010, n = 22)
y <- get_artists(1998, n = 15)
# problem 1
pat_1_1 <- ‘\\w{1,}oo\\w{0,}’
# problem 2
pat_1_2 <- ‘\\w{1,}ick’
# problem 3
pat_1_3 <- ‘^([a-f])[^g-z]\\w{0,}’
# problem 4
pat_1_4 <- ‘\\b(\\S+?)\\1\\S*\\b’
# problem 6
pat_1_6 <- ‘^(\\w(?!p)).*\\1$’
# problem 11
pat_1_11 <- ‘^(\\*?)(\\w*)(\\*?)(\\w*)(\\*?)(\\w*) .* ((?!\\1).+|\\1)\\2((?!\\3).+|\\3)\\4((?!\\5).+|\\5)\\6$’
#problem 7
pat_1_7 <- ‘(.)(.\\1){3}’
# problem 10
pat_1_10 <- ‘[02-5][123][257]|[07][0169]+3?$|55’
# Part 2
pat_2 <- ‘[:graph:]*[:space:]*[:graph:]*[:space:]*feat\\.*[:space:]*[:graph:]*[:space:]*[:graph:]*’
—
title: “Homework 4”
author: “Abhishek Sinha”
output: pdf_document
—
“`{r message=FALSE, warning=FALSE}
library(stringr)
library(tidyverse)
library(rvest)
library(utf8)
load(‘wordlists.RData’)
source(‘104840861_stats102a_hw4.R’)
“`
# Part 1
## Warmup
“`{r}
Warmup <- regex_golf(wordlists$Warmup$x, wordlists$Warmup$y, regex(pat_1_1))
list(Warmup$unmatched_x, Warmup$matched_y, Warmup$score)
“`
## Anchors
“`{r}
Anchors<- regex_golf(wordlists$Anchors$x, wordlists$Anchors$y, regex(pat_1_2))
list(Anchors$unmatched_x, Anchors$matched_y, Anchors$score)
“`
## Ranges
“`{r}
Ranges <- regex_golf(wordlists$Ranges$x, wordlists$Ranges$y, regex(pat_1_3))
list(Ranges$unmatched_x, Ranges$matched_y, Ranges$score)
“`
## Backrefs
“`{r}
Backrefs <- regex_golf(wordlists$Backrefs$x, wordlists$Backrefs$y, regex(pat_1_4))
list(Backrefs$unmatched_x, Backrefs$matched_y, Backrefs$score)
“`
## A man, A plan
“`{r}
A_man_a_plan <- regex_golf(wordlists$`A man, a plan`$x, wordlists$`A man, a plan`$y, regex(pat_1_6))
list(A_man_a_plan$unmatched_x, A_man_a_plan$matched_y, A_man_a_plan$score)
“`
## Prime
“`{r}
Prime <- regex_golf(wordlists$Prime$x, wordlists$Prime$y, regex(pat_1_7))
list(Prime$unmatched_x, Prime$matched_y, Prime$score)
“`
## Triples
“`{r}
Triples <- regex_golf(wordlists$Triples$x, wordlists$Triples$y, regex(pat_1_10))
list(Triples$unmatched_x, Triples$matched_y, Triples$score)
“`
## Glob
“`{r}
Glob <- regex_golf(wordlists$Glob$x, wordlists$Glob$y, regex(pat_1_11))
list(Glob$unmatched_x, Glob$matched_y, Glob$score)
“`
# Part 2

“`{r}
set.seed(104840861)
x <- get_artists(2010, n = 22)
y <- get_artists(1998, n = 15)
Billboard <- regex_golf(x, y, regex(pat_2))
list(Billboard$unmatched_x, Billboard$matched_y, Billboard$score)
“`