Regex / Regular expressions and XPaths alternatives every SEO needs

Search within the URL with regex

https?:\/\/(.*?)\/
const input = "https://www.blick.ch/sport/"
const regex = RegExp("https?:\\/\\/(.*?)\\/", "g")
const output = regex.exec(input)[0]
console.log(output)
// https://www.blick.ch/
const input = "https://www.blick.ch/sport/"
const regex = RegExp("https?:\\/\\/(.*?)\\/", "g")
const output = regex.exec(input)[1]
console.log(output)
// www.blick.ch
([^\/]+[^\/]|[^\/]+[\/])$
const input = "https://www.blick.ch/sport/formel1/neues-reglement-muss-wieder-warten-schwache-fuehrung-legt-formel-1-lahm-id15375623.html"
const regex = RegExp("([^\/]+[^\/]|[^\/]+[\/])$", "g")
const output = regex.exec(input)[0]
console.log(output)
// neues-reglement-muss-wieder-warten-schwache-fuehrung-legt-formel-1-lahm-id15375623.html
-id([0-9]+)\.html
REGEXEXTRACT(A2,"(?:.*id)([0–9]+)(?:.html)")
const input = "https://www.blick.ch/sport/formel1/neues-reglement-muss-wieder-warten-schwache-fuehrung-legt-formel-1-lahm-id15375623.html"
const regex = RegExp("-id([0-9]+)\.html", "g")
const output = regex.exec(input)[1]
console.log(output)
// 15375623
.*-id[0-9].*
const input = "https://www.blick.ch/sport/formel1/neues-reglement-muss-wieder-warten-schwache-fuehrung-legt-formel-1-lahm-id15375623.html"
const regex = RegExp("-id([0-9]+)\.html", "g")
const output = regex.exec(input)[1]
console.log(output)
// 15375623
#(.+)
const input = "https://zrce.eu#test"
const regex = RegExp("#(.+)", "g")
const output = regex.exec(input)[1]
console.log(output)
// test
http:\/
https:\/
const input = "https://zrce.eu#test"
const regex = RegExp("https:\/", "g")
const output = regex.exec(input)
console.log(output)
// [ 'https:/',
// index: 0,
// input: 'https://zrce.eu#test',
// groups: undefined ]
http[s]?:\/\/(.*?)\..*\/
http[s]:\/\/.*?\/(.*?)\/
http[s]:\/\/.*?\/.*?\/(.*?)\/
(\?|\&)([^=\n]+)\=([^&\n]+)

Search within the HTML with regex + XPath alternatives

type=”application\/ld\+json”>?([^<]*)
//script[@type="application/ld+json"]
(?i)name\s*=\s*['"]?news_keywords[^>]+content\s*=\s*['"]?([^'"]*)['"]?|content\s*=\s*['"]?([^"']*)['"]?[^>]+name\s*=\s*['"]?news_keywords['"]?
//meta[@name='news_keywords']/@content
(?i)hreflang\s*=\s*['"]?de-de[^>]+href\s*=\s*['"]?([^'"]*)['"]?|href\s*=\s*['"]?([^"']*)['"]?[^>]+hreflang\s*=\s*['"]?de-de['"]?

Find RSS Feed URL with regex

(?i)type\s*=\s*['"]?application\/rss\+xml[^>]+href\s*=\s*['"]?([^'"]*)['"]?|href\s*=\s*['"]?([^"']*)['"]?[^>]+type\s*=\s*['"]?application\/rss\+xml['"]?
(?i)name\s*=\s*['"]?robots[^>]+content\s*=\s*['"]?([^'"]*)['"]?|content\s*=\s*['"]?([^"']*)['"]?[^>]+name\s*=\s*['"]?robots['"]?
(?i)name\s*=\s*['"]?date[^>]+content\s*=\s*['"]?([^'"]*)['"]?|content\s*=\s*['"]?([^"']*)['"]?[^>]+name\s*=\s*['"]?date['"]?
//meta[@name='date']/@content
UA-?([^’]*)
GTM-?([^’]*)
(?i)name\s*=\s*[‘“]?google-site-verification[^>]+content\s*=\s*[‘“]?([^’”]*)[‘“]?
//meta[@name='google-site-verification']/@content
(?i)rel\s*=\s*[‘“]?amphtml[^>]+href\s*=\s*[‘“]?([^’”]*)[‘“]?|href\s*=\s*[‘“]?([^”’]*)[‘“]?[^>]+rel\s*=\s*[‘“]?amphtml[‘“]?
//link[@rel='amphtml']/@href
(\w+\-).*\1
<div class="ArticleMetadata__Wrapper-sc-1xm0v61-0 dKWmkV article-metadata">

Anything missing?

Any option to make the rules above more efficient and nice?

Used tools

--

--

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store