# Regex and Parsing

{% embed url="<https://www.hackerrank.com/domains/python?badge_type=python&filters%5Bsubdomains%5D%5B%5D=py-regex>" %}

## Detect Floating Point Number

<figure><img src="/files/qLSCv4g3DPE2O1A4k3S0" alt=""><figcaption></figcaption></figure>

```python
import re

for _ in range(int(input())):
    if re.match(r'^[+-]?\d*\.\d+$', input()):
        print(True)
    else:
        print(False)
```

***

## Re.split()

<figure><img src="/files/HgMdCW6384ksrTqBzG1G" alt=""><figcaption></figcaption></figure>

```python
regex_pattern = r'[,.]'	# Do not delete 'r'.

import re
print("\n".join(re.split(regex_pattern, input())))
```

***

## Group(), Groups() & Groupdict()

<figure><img src="/files/OJ42lzBwHiUbNsSgMELd" alt=""><figcaption></figcaption></figure>

```python
import re

matches = re.match(r".*?([a-zA-Z\d])\1.*", input())

if matches:
    print(matches.group(1))
else:
    print(-1)

```

***

## Re.findall() & Re.finditer()

<figure><img src="/files/woCKRJjv0lOFzB8PC6RI" alt=""><figcaption></figcaption></figure>

```python
import re

matches = re.findall(r"(?<=[^aeiouAEIOU])([aeiouAEIOU]{2,})(?=[^aeiouAEIOU])", input())

if matches:
    for each in matches:
        print(each)
else:
    print(-1)
```

***

## Re.start() & Re.end()

<figure><img src="/files/Qek5w2aTp32GwKXBQzRV" alt=""><figcaption></figcaption></figure>

```python
import re

s = input()
k = input()
match = list(re.finditer(rf"(?=({k}))", s))

if match:
    for each in match:
        print((each.start(1), each.end(1) - 1))
else:
    print((-1, -1))
```

***

## Validating Roman Numerals

<figure><img src="/files/NJSYLyejsViMZdbZaiWz" alt=""><figcaption></figcaption></figure>

```python
regex_pattern = r"^M{0,3}(C[MD]|D?C{0,3})(X[CL]|L?X{0,3})(I[XV]|V?I{0,3})$"

import re
print(str(bool(re.match(regex_pattern, input()))))
```

***

## Validating Phone Numbers

<figure><img src="/files/mBj2CWTktnP5TKXdPmgF" alt=""><figcaption></figcaption></figure>

```python
import re

for _ in range(int(input())):
    match = re.match(r"^[789]\d{9}$", input())
    
    if match:
        print("YES")
    else:
        print("NO")
```

***

## Validating and Parsing Email Addresses

<figure><img src="/files/RzNWYJQtBnsXgkpv31T4" alt=""><figcaption></figcaption></figure>

<figure><img src="/files/nOmkooISK4COR2V2DNtK" alt=""><figcaption></figcaption></figure>

```python
import re
import email.utils

for _ in range(int(input())):
    parsedAddr = email.utils.parseaddr(input())
    match = re.match(r"^[a-zA-Z][\w\-\.]*@[a-zA-Z]+\.[a-zA-Z]{1,3}$", parsedAddr[1])
    
    if match:
        print(email.utils.formataddr(parsedAddr))
```

***

## Hex Color Code

<figure><img src="/files/zUCa9hcGrACIBVqZULoJ" alt=""><figcaption></figcaption></figure>

<figure><img src="/files/dMBSCPyWBeoZDxWKzWJS" alt=""><figcaption></figcaption></figure>

```python
import re
import sys

css = sys.stdin.read()

for each in re.findall(r"[ ,:](#[\dabcdefABCDEF]{6}|#[\dabcdefABCDEF]{3})", css):
    print(each)
```

***

## HTML Parser - Part 1

<figure><img src="/files/S3RFvH5XpdywEUogDUUG" alt=""><figcaption></figcaption></figure>

<figure><img src="/files/cHfqwyV9d46eZ1DcSwYp" alt=""><figcaption></figcaption></figure>

```python
from html.parser import HTMLParser
import sys

html = sys.stdin.read()


class MyHTMLParser(HTMLParser):
    def handle_starttag(self, tag, attrs):
        print("Start".ljust(5) + " :", tag)
        for attr in attrs:
            print(f'-> {attr[0]} > {attr[1]}')

    def handle_endtag(self, tag):
        print("End".ljust(5) + " :", tag)

    def handle_startendtag(self, tag, attrs):
        print("Empty".ljust(5) + " :", tag)
        for attr in attrs:
            print(f'-> {attr[0]} > {attr[1]}')


parser = MyHTMLParser()

parser.feed(html)
```

***

## HTML Parser - Part 2

<figure><img src="/files/qjZjJNqg2zcB9F9xN8nX" alt=""><figcaption></figcaption></figure>

```python
from html.parser import HTMLParser
import sys

n = int(input())
html = sys.stdin.read()


class MyHTMLParser(HTMLParser):
    def handle_comment(self, data):
        if len(data.split('\n')) > 1:
            print(">>> Multi-line Comment")
            print(data)
        else:
            print(">>> Single-line Comment")
            print(data)

    def handle_data(self, data):
        if data != "\n":
            print(">>> Data")
            print(data)


parser = MyHTMLParser()

parser.feed(html)
```

***

## Detect HTML Tags, Attributes and Attribute Values

<figure><img src="/files/1fZCiZqE43YtON50xWiL" alt=""><figcaption></figcaption></figure>

```python
from html.parser import HTMLParser
import sys

html = sys.stdin.read()


class MyHTMLParser(HTMLParser):
    def handle_starttag(self, tag, attrs):
        print(tag)
        for attr in attrs:
            print(f'-> {attr[0]} > {attr[1]}')

    def handle_startendtag(self, tag, attrs):
        print(tag)
        for attr in attrs:
            print(f'-> {attr[0]} > {attr[1]}')


parser = MyHTMLParser()

parser.feed(html)
```

***

## Validating UID

<figure><img src="/files/yVq5TKJFhPzE5kgvfvCG" alt=""><figcaption></figcaption></figure>

```python
import re

for _ in range(int(input())):
    if re.match(r"^(?!.*(.).*\1)(?=.*[A-Z].*[A-Z])(?=.*\d.*\d.*\d)[A-Za-z0-9]{10}$", input()):
        print("Valid")
    else:
        print("Invalid")
```

***

## Regex Substitution

<figure><img src="/files/Xy3Z9hFFkmCuefXezzP4" alt=""><figcaption></figcaption></figure>

```python
import re
import sys

n = int(input())
html = sys.stdin.read()

print(re.sub(r'(?<= )\|\|(?= )', 'or', re.sub(r'(?<= )&&(?= )', 'and', html)))
```

***

## Validating Credit Card Numbers

<figure><img src="/files/yTgmp8hsvl7khmqYw3QV" alt=""><figcaption></figcaption></figure>

<figure><img src="/files/vopNXgfMTcuM57jPQvfM" alt=""><figcaption></figcaption></figure>

```python
import re

for _ in range(int(input())):
    if re.match(r'^[456]\d(\d)(?!\1\-?\1{2})\d(-?)\d{2}(\d)(?!\3\-?\3{2})\d\2\d{2}(\d)(?!\4\-?\4{2})\d\2\d{2}(\d)(?!\5\-?\5{2})\d$', input()):
        print('Valid')
    else:
        print('Invalid')
```

***

## Validating Postal Code

<figure><img src="/files/CaUihmVhUrVr8OQPoZNW" alt=""><figcaption></figcaption></figure>

<figure><img src="/files/DTGywXeggj0ZoB1NBY0y" alt=""><figcaption></figcaption></figure>

```python
# Use PyPy3
regex_integer_in_range = r'^[1-9]\d{5}$'	# Do not delete 'r'.
regex_alternating_repetitive_digit_pair = r'(\d)(?=\d\1)'	# Do not delete 'r'.


import re
P = input()

print (bool(re.match(regex_integer_in_range, P)) 
and len(re.findall(regex_alternating_repetitive_digit_pair, P)) < 2)
```

***

## Matrix Script

<figure><img src="/files/8doL0dSi1qqXSeard7OO" alt=""><figcaption></figcaption></figure>

<figure><img src="/files/FZsbSerJCfCJP4RvzsGH" alt=""><figcaption></figcaption></figure>

```python
import re

n, m = map(int, input().strip().split())

matrix = [list(input()) for _ in range(n)]

decodedScript = "".join(["".join(each) for each in list(zip(*matrix))])

print(re.sub(r'\b\W+\b', ' ', decodedScript))
```


---

# Agent Instructions: Querying This Documentation

If you need additional information that is not directly available in this page, you can query the documentation dynamically by asking a question.

Perform an HTTP GET request on the current page URL with the `ask` query parameter:

```
GET https://thamizhiniyancs.gitbook.io/writeups/hackerrank/python/regex-and-parsing.md?ask=<question>
```

The question should be specific, self-contained, and written in natural language.
The response will contain a direct answer to the question and relevant excerpts and sources from the documentation.

Use this mechanism when the answer is not explicitly present in the current page, you need clarification or additional context, or you want to retrieve related documentation sections.
