Text.Regex regex-posix does NOT support
word boundary
Use package: Text.RE.TDFA.String instead
Text.Regex.Posix and Text.Regex.Posix.String wraps the C POSIX-2 Regex API.
It does not support multiple bytes char (unicode).
Use Text.Regex.TDFA and Text.Regex.TDFA.Text instead.
-- /Users/cat/myfile/bitbucket/haskell/regexExample.hs
Fix Regex unicode error with Text.Regex.TDFA and Text.Regex.TDFA.Text
> :m -Text.Regex.Posix
> :m +Text.Regex.TDFA
> :m +Text.Regex.TDFA.Text
> st = "iabbr vodiv: ⨸"
> splitRegex (mkRegex ":") st
["iabbr vodiv"," \10808"]
>
Replace and Search word in boundary
--{-# LANGUAGE QuasiQuotes #-}
--import Text.RE.TDFA.String
main = do
-- replace word boundary, replace one, and replace all
let s1 = "mydog dog dog (dog)" ?=~/ [ed|${adr}(\):?///< div class="dog">${adr}< /div> |]
let s2 = "mydog dog dog (dog)" *=~/ [ed|${adr}(\):?///< div class="dog">${adr}< /div> |]
putStrLn $ "replace one: =>[" ++ s1 ++ "]"
putStrLn $ "replace all: =>[" ++ s2 ++ "]"
Use =~
re1 = "my food" =~ "foo"::Bool -- True
re2 = "my food foo" =~ "foo"::String -- "foo"
re3 = "my foolish fool" =~ "foo."::[[String]] -- [["fool"],["fool"]]
re4 = "my foolish fool" =~ "foo."::(String, String, String) -- ("my ","fool","ish fool")
"mydog dog-- dog pig cat dog fox" => "mydog [dog]-- [dog] pig cat [dog] fox"
-- word boundary, search word and replace
let s = "mydog dog-- dog pig cat dog fox"
searchReplace s word "[\\0]"
"http://google.com" => http://[google].com
let r1 = mkRegex "google"
let input = "http://google.com"
let replace1 = "[\\0]"
putStrLn $ subRegex r1 input replace1 -- http://[google].com
"banana" => ba[nana] {na}
let r3 = mkRegex "(na)+"
putStrLn $ subRegex r3 "banana" "[\\0] {\\1}"
"http://file.pdf" => [http://file.PDF]
let r5 = mkRegex ".*\\.pdf"
putStrLn $ subRegex r5 "http://file.pdf" "[\\0]" -- [http://file.PDF]
Haskell Capture Group in Regex
1. Split email address into two part by '@' sign
"root_admin.last-name@gmail.com" => [root_admin.last-name@gmail.com] {root_admin.last-name} {gmail.com}
-- capture group, capture email address
let rgx = mkRegex "([A-Za-z._-]+)+@(([a-z])+\\.([a-z]{2,4}))"
↑
+ - - - - - - - + Input Str whole str fst group snd group
↓ ↓ ↓ ↓ ↓
putStrLn $ subRegex rgx "root_admin.last-name@gmail.com" "7. [\\0] {\\1} {\\2}"
"/home/user/file/mybook.PDF" => "/home/user/file/mybook.PDF"
let r7 = mkRegexWithOpts "(.*\\.pdf)" True False
let input = "/home/user/file/mybook.PDF"
let callMeMaybe = matchRegex r7 input
"/home/user/file/mybook.PDF" => ".PDF"
let r7 = mkRegexWithOpts "(\\.pdf)" True False
let input = "/home/user/file/mybook.PDF"
let callMeMaybe = matchRegex r7 input
"779-123-4483" => "[416-123-4483]"
-- capture phone number
let p1 = mkRegex "[0-9]{3}[ -]?[0-9]{3}[ -]?[0-9]{4}"
-- replace string
putStrLn $ subRegex p1 "779-123-4483" "[\\0]" -- [416-123-4483]
"PaloAlto:MountainView" => ["PaloAlto","MountainView"]
-- split Regex
print $ splitRegex(mkRegex ":") "PaloAlto:MountainView" -- ["PaloAlto","MountainView"]
Text.Regex.Posix.String died: illegal byte sequence
-- split Regex, ERROR: illegal byte sequence
print $ splitRegex(mkRegex ":") "iabbr vRightarrow ⟹ "
capture function in ObjectiveC
-- capture function in ObjectiveC
let me = mkRegex "(-|\\+)[[:space:]]*\\([[:space:]]*[[:print:]]+[[:space:]]*\\)[[:space:]]*[[:graph:]]+\
\[[:space:]]*:?[[:space:]]*([[:space:]]*[[:graph:]]+[[:space:]]*:[[:space:]]*[[:graph:]]+[[:space:]]*)*"
putStrLn $ subRegex me "- (NSstring*) swap :" "[\\0]" -- [- (NSstring*) swap :]
capture function in ObjectiveC
putStrLn $ subRegex me "- (NSstring*) method:(Integer)num width:(Integer)width\
\ height:(NSString*)str" "12. [\\0]"
-- [- (NSstring*) method:(Integer)num width:(Integer)width height:(NSString*)str]
match count
let r7 = mkRegex "([A-Za-z]+)"
let count = matchCount r7 "haskell java C++ ObejctiveC aa#"
putStrLn $ "count = " ++ show count
match test
let r8 = mkRegex "[a-z]+"
let bo = matchTest r8 "22323"
putStrLn $ "Is matched = " ++ show bo -- return True