regex_sandbox.py (Source)

	`r"""`
	`Python Regular Expression Quick Guide`

	`^ Matches the beginning of a line`
	`$ Matches the end of the line`
	`. Matches any character`
	`\s Matches whitespace`
	`\S Matches any non-whitespace character`
	`* Repeats a character zero or more times`
	`*? Repeats a character zero or more times`
	`(non-greedy)`
	`+ Repeats a character one or more times`
	`+? Repeats a character one or more times`
	`(non-greedy)`
	`[aeiou] Matches a single character in the listed set`
	`[^XYZ] Matches a single character not in the listed set`
	`[a-z0-9] The set of characters can include a range`
	`( Indicates where string extraction is to start`
	`) Indicates where string extraction is to end`
	`"""`

	`import re`

	`text_line = 'My 2 favorite numbers are 19 and 42'`
	`# + means 'one or more times'`
	`found_numbers = re.findall(r'[0-9]+', text_line)`
	`absent_substring = re.findall(r'[AEIOU]', text_line)`
	`print(found_numbers)`
	`print(absent_substring)`

	`# Warning: Greedy Matching`
	`# The repeat characters (* and +) push outward in both directions`
	`# (greedy) to match the largest possible string`
	`text_line_2 = 'From: Using the : character'`
	`found_numbers_2 = re.findall(r'^F.+:', text_line_2)`
	`print(found_numbers_2) # -> ['From: Using the :']`

	`# Non-Greedy Matching`
	`# Not all regular expression repeat codes are greedy!`
	`# If you add a "?" character, the + and * chill out a bit...`
	`found_numbers_2 = re.findall(r'^F.+?:', text_line_2)`
	`print(found_numbers_2) # -> ['From:']`

	`# Fine-Tuning String Extraction`
	`# \S+ - at least one non-whitespace character`
	`str_with_email = "From stephen.marquard@uct.ac.za Sat Jan 5 09:14:16 2008"`
	`found_numbers_3 = re.findall(r'\S+@\S+', str_with_email)`
	`print(found_numbers_3) # -> ['stephen.marquard@uct.ac.za']`

	`# Parentheses are not part of the match - but they tell where`
	`# to start and stop what string to extract`
	`found_numbers_4 = re.findall(r'^From (\S+@\S+)', str_with_email)`
	`print('found_numbers_4:', found_numbers_4)`

	`# [^ ] - means "everything except blank"`
	`found_numbers_5 = re.findall(r'@([^ ]*)', str_with_email)`
	`print('found_numbers_5:', found_numbers_5) # -> ['uct.ac.za']`

	`# even cooler/refined regex version: extract only from the line which starts with`
	`# "From ..."`
	`# like "if" statement`
	`found_numbers_6 = re.findall(r'^From .@([^ ])', str_with_email)`
	`print('found_numbers_6:', found_numbers_6) # -> ['uct.ac.za']`

	`# Escape Character`
	`cookies_str = 'We just received $10.00 for cookies.'`
	`cookies_match_all = re.findall(r'\$[0-9.]+', cookies_str)`
	`print('cookies_match_all', cookies_match_all) # ['$10.00']`