go-regexp

宰父熙云

2023-12-01

golang Regexp主要提供如下正则所表示的16个方法：

Find(All)?(String)?(Submatch)?(Index)?

若带All，该方法返回一个所有递进匹配结果的slice；该方法需要额外传一个整数n，若n>=0，至多返回n个匹配或子匹配，若x<0，返回全部。

若带String，该方法传入的参数需是string，否则为字节slice，返回结果也为对应的string。

若带Submatch，该方法返回表达式递进的子匹配slice（子匹配匹配以括号扩起的表达式，也称作匹配组），该slice以左括号从左到右的顺序返回匹配结果，即第0个为匹配整个表达式的结果，第1个为匹配第一个左括号所表示表达式的结果，以此类推。

若带Index，匹配与子匹配使用字节位置索引对来标识，result[2n:2n+1]标识第n个子匹配的索引。

也有一些方法不属上述正则所表示的范围。

1 基础用法

接下来看一个简单的例子。如下代码，在使用前首先将正则表达式编译，然后对多组字符串判断是否匹配。

package main

import (
	"fmt"
	"regexp"
)

var (
	p = regexp.MustCompile(`^[a-z]+\[\d+\]$`)
)

func main() {
	fmt.Println(p.MatchString("larry[12]"))
	fmt.Println(p.MatchString("jacky[12]"))
	fmt.Println(p.MatchString("linda[a12]"))
}

若仅一次性简单判断字符串是否匹配，也可以不创建Regexp，直接调用regexp包函数。

package main

import (
	"fmt"
	"regexp"
)

func main() {
	fmt.Println(regexp.Match(`\w+`, []byte("hello")))
	fmt.Println(regexp.MatchString(`\d+`, "hello"))
}

常用方法示例：

package main

import (
	"fmt"
	"regexp"
)

func main() {
	p := regexp.MustCompile(`a.`)
	fmt.Println(p.Find([]byte("ababab")))
	fmt.Println(p.FindString("ababab"))
	fmt.Println(p.FindAllString("ababab", -1))
	fmt.Println(p.FindAllStringIndex("ababab", -1))

	q, _ := regexp.Compile(`^a(.*)b$`)
	fmt.Println(q.FindAllSubmatch([]byte("ababab"), -1))
	fmt.Println(q.FindAllStringSubmatch("ababab", -1))
	fmt.Println(q.FindAllStringSubmatchIndex("ababab", -1))

	r := regexp.MustCompile(`(?m)(key\d+):\s+(value\d+)`)
	content := []byte(`
        # comment line
        key1: value1
        key2: value2
        key3: value3
    `)
	fmt.Println(string(r.Find(content)))
	for _, matched := range r.FindAll(content, -1) {
		fmt.Println(string(matched))
	}
	for _, mutiMatched := range r.FindAllSubmatch(content, -1) {
		for _, matched := range mutiMatched {
			fmt.Println(string(matched))
		}
	}
}

2 进阶用法

2.1 Split

Split方法返回对传入字符串以表达式为分割符的子串slice，第二个参数n指定最多返回的子串数，负数表示返回所有子串。

package main

import (
	"fmt"
	"regexp"
)

func main() {
	for _, sub := range regexp.MustCompile(`a+`).Split("heaallo woarld", -1) {
		fmt.Println(sub)
	}
}

2.2 Replace

如下代码，ReplaceAllString返回源字符串将匹配部分替换为字符串模板的拷贝，替换模板采用符标识第几个替换组，如1标识1第一个子匹配组。

package main

import (
	"fmt"
	"regexp"
)

func main() {
	p := regexp.MustCompile(`(?P\w+)\s+(?P\w+)`)
	names := p.SubexpNames()
	fmt.Println(p.ReplaceAllString("hello world",
		fmt.Sprintf("$%s $%s", names[2], names[1])))
}

2.3 Expand

Expand将匹配模板所匹配部分叠加至dst尾部并返回。

package main

import (
	"fmt"
	"regexp"
)

func main() {
	content := []byte(`
	# json fragment
	"id": "dbsuye23sd83d8dasf7",
	"name": "Larry",
	"birth_year": 2000
	`)
	p := regexp.MustCompile(`(?m)"(?P\w+)":\s+"?(?P[a-zA-Z0-9]+)"?`)
	var dst []byte
	tpl := []byte("$key=$value\n")
	for _, submatches := range p.FindAllSubmatchIndex(content, -1) {
		dst = p.Expand(dst, tpl, content, submatches)
	}
	fmt.Println(string(dst))
}

https://www.cnblogs.com/golove/p/3270918.html

Golang学习 - regexp 包

// 函数

// 判断在 b（s、r）中能否找到 pattern 所匹配的字符串
func Match(pattern string, b []byte) (matched bool, err error)
func MatchString(pattern string, s string) (matched bool, err error)
func MatchReader(pattern string, r io.RuneReader) (matched bool, err error)

// 将 s 中的正则表达式元字符转义成普通字符。
func QuoteMeta(s string) string

// 示例：MatchString、QuoteMeta
func main() {
pat := (((abc.)def.)ghi)
src := abc-def-ghi abc+def+ghi

fmt.Println(regexp.MatchString(pat, src))
// true <nil>

fmt.Println(regexp.QuoteMeta(pat))
// \(\(\(abc\.\)def\.\)ghi\)

}

// Regexp 代表一个编译好的正则表达式，我们这里称之为正则对象。正则对象可以
// 在文本中查找匹配的内容。
//
// Regexp 可以安全的在多个例程中并行使用。
type Regexp struct { … }

// 编译

// 将正则表达式编译成一个正则对象（使用 PERL 语法）。
// 该正则对象会采用“leftmost-first”模式。选择第一个匹配结果。
// 如果正则表达式语法错误，则返回错误信息。
func Compile(expr string) (*Regexp, error)

// 将正则表达式编译成一个正则对象（正则语法限制在 POSIX ERE 范围内）。
// 该正则对象会采用“leftmost-longest”模式。选择最长的匹配结果。
// POSIX 语法不支持 Perl 的语法格式：\d、\D、\s、\S、\w、\W
// 如果正则表达式语法错误，则返回错误信息。
func CompilePOSIX(expr string) (*Regexp, error)

// 功能同上，但会在解析失败时 panic
func MustCompile(str string) *Regexp
func MustCompilePOSIX(str string) *Regexp

// 让正则表达式在之后的搜索中都采用“leftmost-longest”模式。
func (re *Regexp) Longest()

// 返回编译时使用的正则表达式字符串
func (re *Regexp) String() string

// 返回正则表达式中分组的数量
func (re *Regexp) NumSubexp() int

// 返回正则表达式中分组的名字
// 第 0 个元素表示整个正则表达式的名字，永远是空字符串。
func (re *Regexp) SubexpNames() []string

// 返回正则表达式必须匹配到的字面前缀（不包含可变部分）。
// 如果整个正则表达式都是字面值，则 complete 返回 true。
func (re *Regexp) LiteralPrefix() (prefix string, complete bool)

// 示例：第一匹配和最长匹配
func main() {
b := []byte(“abc1def1”)
pat := abc1|abc1def1
reg1 := regexp.MustCompile(pat) // 第一匹配
reg2 := regexp.MustCompilePOSIX(pat) // 最长匹配
fmt.Printf("%s\n", reg1.Find(b)) // abc1
fmt.Printf("%s\n", reg2.Find(b)) // abc1def1

b = []byte("abc1def1")
pat = `(abc|abc1def)*1`
reg1 = regexp.MustCompile(pat)      // 第一匹配
reg2 = regexp.MustCompilePOSIX(pat) // 最长匹配
fmt.Printf("%s\n", reg1.Find(b))    // abc1
fmt.Printf("%s\n", reg2.Find(b))    // abc1def1

}

// 示例：正则信息
func main() {
pat := (abc)(def)(ghi)
reg := regexp.MustCompile(pat)

// 获取正则表达式字符串
fmt.Println(reg.String())    // (abc)(def)(ghi)

// 获取分组数量
fmt.Println(reg.NumSubexp()) // 3

fmt.Println()

// 获取分组名称
pat = `(?P<Name1>abc)(def)(?P<Name3>ghi)`
reg = regexp.MustCompile(pat)

for i := 0; i <= reg.NumSubexp(); i++ {
	fmt.Printf("%d: %q\n", i, reg.SubexpNames()[i])
}
// 0: ""
// 1: "Name1"
// 2: ""
// 3: "Name3"

fmt.Println()

// 获取字面前缀
pat = `(abc1)(abc2)(abc3)`
reg = regexp.MustCompile(pat)
fmt.Println(reg.LiteralPrefix()) // abc1abc2abc3 true

pat = `(abc1)|(abc2)|(abc3)`
reg = regexp.MustCompile(pat)
fmt.Println(reg.LiteralPrefix()) //  false

pat = `abc1|abc2|abc3`
reg = regexp.MustCompile(pat)
fmt.Println(reg.LiteralPrefix()) // abc false

}

// 判断

// 判断在 b（s、r）中能否找到匹配的字符串
func (re *Regexp) Match(b []byte) bool
func (re *Regexp) MatchString(s string) bool
func (re *Regexp) MatchReader(r io.RuneReader) bool

// 查找

// 返回第一个匹配到的结果（结果以 b 的切片形式返回）。
func (re *Regexp) Find(b []byte) []byte

// 返回第一个匹配到的结果及其分组内容（结果以 b 的切片形式返回）。
// 返回值中的第 0 个元素是整个正则表达式的匹配结果，后续元素是各个分组的
// 匹配内容，分组顺序按照“(”的出现次序而定。
func (re *Regexp) FindSubmatch(b []byte) [][]byte

// 功能同 Find，只不过返回的是匹配结果的首尾下标，通过这些下标可以生成切片。
// loc[0] 是结果切片的起始下标，loc[1] 是结果切片的结束下标。
func (re *Regexp) FindIndex(b []byte) (loc []int)

// 功能同 FindSubmatch，只不过返回的是匹配结果的首尾下标，通过这些下标可以生成切片。
// loc[0] 是结果切片的起始下标，loc[1] 是结果切片的结束下标。
// loc[2] 是分组1切片的起始下标，loc[3] 是分组1切片的结束下标。
// loc[4] 是分组2切片的起始下标，loc[5] 是分组2切片的结束下标。
// 以此类推
func (re *Regexp) FindSubmatchIndex(b []byte) (loc []int)

// 示例：Find、FindSubmatch
func main() {
pat := (((abc.)def.)ghi)
reg := regexp.MustCompile(pat)

src := []byte(`abc-def-ghi abc+def+ghi`)

// 查找第一个匹配结果
fmt.Printf("%s\n", reg.Find(src)) // abc-def-ghi

fmt.Println()

// 查找第一个匹配结果及其分组字符串
first := reg.FindSubmatch(src)
for i := 0; i < len(first); i++ {
	fmt.Printf("%d: %s\n", i, first[i])
}
// 0: abc-def-ghi
// 1: abc-def-ghi
// 2: abc-def-
// 3: abc-

}

// 示例：FindIndex、FindSubmatchIndex
func main() {
pat := (((abc.)def.)ghi)
reg := regexp.MustCompile(pat)

src := []byte(`abc-def-ghi abc+def+ghi`)

// 查找第一个匹配结果
matched := reg.FindIndex(src)
fmt.Printf("%v\n", matched) // [0 11]
m := matched[0]
n := matched[1]
fmt.Printf("%s\n\n", src[m:n]) // abc-def-ghi

// 查找第一个匹配结果及其分组字符串
matched = reg.FindSubmatchIndex(src)
fmt.Printf("%v\n", matched) // [0 11 0 11 0 8 0 4]
for i := 0; i < len(matched)/2; i++ {
	m := matched[i*2]
	n := matched[i*2+1]
	fmt.Printf("%s\n", src[m:n])
}
// abc-def-ghi
// abc-def-ghi
// abc-def-
// abc-

}

// 功能同上，只不过返回多个匹配的结果，而不只是第一个。
// n 是查找次数，负数表示不限次数。
func (re *Regexp) FindAll(b []byte, n int) [][]byte
func (re *Regexp) FindAllSubmatch(b []byte, n int) [][][]byte

func (re *Regexp) FindAllIndex(b []byte, n int) [][]int
func (re *Regexp) FindAllSubmatchIndex(b []byte, n int) [][]int

// 示例：FindAll、FindAllSubmatch
func main() {
pat := (((abc.)def.)ghi)
reg := regexp.MustCompile(pat)

s := []byte(`abc-def-ghi abc+def+ghi`)

// 查找所有匹配结果
for _, one := range reg.FindAll(s, -1) {
	fmt.Printf("%s\n", one)
}
// abc-def-ghi
// abc+def+ghi

// 查找所有匹配结果及其分组字符串
all := reg.FindAllSubmatch(s, -1)
for i := 0; i < len(all); i++ {
	fmt.Println()
	one := all[i]
	for i := 0; i < len(one); i++ {
		fmt.Printf("%d: %s\n", i, one[i])
	}
}
// 0: abc-def-ghi
// 1: abc-def-ghi
// 2: abc-def-
// 3: abc-

// 0: abc+def+ghi
// 1: abc+def+ghi
// 2: abc+def+
// 3: abc+

}

// 功能同上，只不过在字符串中查找
func (re *Regexp) FindString(s string) string
func (re *Regexp) FindStringSubmatch(s string) []string

func (re *Regexp) FindStringIndex(s string) (loc []int)
func (re *Regexp) FindStringSubmatchIndex(s string) []int

func (re *Regexp) FindAllString(s string, n int) []string
func (re *Regexp) FindAllStringSubmatch(s string, n int) [][]string

func (re *Regexp) FindAllStringIndex(s string, n int) [][]int
func (re *Regexp) FindAllStringSubmatchIndex(s string, n int) [][]int

// 功能同上，只不过在 io.RuneReader 中查找。
func (re *Regexp) FindReaderIndex(r io.RuneReader) (loc []int)
func (re *Regexp) FindReaderSubmatchIndex(r io.RuneReader) []int

// 替换（不会修改参数，结果是参数的副本）

// 将 src 中匹配的内容替换为 repl（repl 中可以使用 $1 $name 等分组引用符）。
func (re *Regexp) ReplaceAll(src, repl []byte) []byte

// 将 src 中匹配的内容经过 repl 函数处理后替换回去。
func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte

// 将 src 中匹配的内容替换为 repl（repl 为字面值，不解析其中的 $1 $name 等）。
func (re *Regexp) ReplaceAllLiteral(src, repl []byte) []byte

// 功能同上，只不过在字符串中查找。
func (re *Regexp) ReplaceAllString(src, repl string) string
func (re *Regexp) ReplaceAllStringFunc(src string, repl func(string) string) string
func (re *Regexp) ReplaceAllLiteralString(src, repl string) string

// Expand 要配合 FindSubmatchIndex 一起使用。FindSubmatchIndex 在 src 中进行
// 查找，将结果存入 match 中。这样就可以通过 src 和 match 得到匹配的字符串。
// template 是替换内容，可以使用分组引用符 $1、 $2 、$ name 等。Expane 将其中的分
// 组引用符替换为前面匹配到的字符串。然后追加到 dst 的尾部（dst 可以为空）。
// 说白了 Expand 就是一次替换过程，只不过需要 FindSubmatchIndex 的配合。
func (re *Regexp) Expand(dst []byte, template []byte, src []byte, match []int) []byte

// 功能同上，参数为字符串。
func (re *Regexp) ExpandString(dst []byte, template string, src string, match []int) []byte

// 示例：Expand
func main() {
pat := (((abc.)def.)ghi)
reg := regexp.MustCompile(pat)

src := []byte(`abc-def-ghi abc+def+ghi`)
template := []byte(`$0   $1   $2   $3`)

// 替换第一次匹配结果
match := reg.FindSubmatchIndex(src)
fmt.Printf("%v\n", match) // [0 11 0 11 0 8 0 4]
dst := reg.Expand(nil, template, src, match)
fmt.Printf("%s\n\n", dst)
// abc-def-ghi   abc-def-ghi   abc-def-   abc-

// 替换所有匹配结果
for _, match := range reg.FindAllSubmatchIndex(src, -1) {
	fmt.Printf("%v\n", match)
	dst := reg.Expand(nil, template, src, match)
	fmt.Printf("%s\n", dst)
}
// [0 11 0 11 0 8 0 4]
// abc-def-ghi   abc-def-ghi   abc-def-   abc-
// [12 23 12 23 12 20 12 16]
// abc+def+ghi   abc+def+ghi   abc+def+   abc+

}

// 其它

// 以 s 中的匹配结果作为分割符将 s 分割成字符串列表。
// n 是分割次数，负数表示不限次数。
func (re *Regexp) Split(s string, n int) []string

// 将当前正则对象复制一份。在多例程中使用同一正则对象时，给每个例程分配一个
// 正则对象的副本，可以避免多例程对单个正则对象的争夺锁定。
func (re *Regexp) Copy() *Regexp

regexp 包中的函数和方法

// regexp.go

// 判断在 b 中能否找到正则表达式 pattern 所匹配的子串
// pattern：要查找的正则表达式
// b：要在其中进行查找的 []byte
// matched：返回是否找到匹配项
// err：返回查找过程中遇到的任何错误
// 此函数通过调用 Regexp 的方法实现
func Match(pattern string, b []byte) (matched bool, err error)

func main() {
fmt.Println(regexp.Match("H.* ", []byte(“Hello World!”)))
// true
}

// 判断在 r 中能否找到正则表达式 pattern 所匹配的子串
// pattern：要查找的正则表达式
// r：要在其中进行查找的 RuneReader 接口
// matched：返回是否找到匹配项
// err：返回查找过程中遇到的任何错误
// 此函数通过调用 Regexp 的方法实现
func MatchReader(pattern string, r io.RuneReader) (matched bool, err error)

func main() {
r := bytes.NewReader([]byte(“Hello World!”))
fmt.Println(regexp.MatchReader("H.* ", r))
// true
}

// 判断在 s 中能否找到正则表达式 pattern 所匹配的子串
// pattern：要查找的正则表达式
// r：要在其中进行查找的字符串
// matched：返回是否找到匹配项
// err：返回查找过程中遇到的任何错误
// 此函数通过调用 Regexp 的方法实现
func MatchString(pattern string, s string) (matched bool, err error)

func main() {
fmt.Println(regexp.Match("H.* ", “Hello World!”))
// true
}

// QuoteMeta 将字符串 s 中的“特殊字符”转换为其“转义格式”
// 例如，QuoteMeta（[foo]）返回\[foo\]。
// 特殊字符有：.+*?()|[]{}^$
// 这些字符用于实现正则语法，所以当作普通字符使用时需要转换
func QuoteMeta(s string) string

func main() {
fmt.Println(regexp.QuoteMeta("(?P:Hello) [a-z]"))
// (?P:Hello) [a-z]
}

// Regexp 结构表示一个编译后的正则表达式
// Regexp 的公开接口都是通过方法实现的
// 多个 goroutine 并发使用一个 RegExp 是安全的
type Regexp struct {
// 私有字段
}

// 通过 Complite、CompilePOSIX、MustCompile、MustCompilePOSIX
// 四个函数可以创建一个 Regexp 对象

// Compile 用来解析正则表达式 expr 是否合法，如果合法，则返回一个 Regexp 对象
// Regexp 对象可以在任意文本上执行需要的操作
func Compile(expr string) (*Regexp, error)

func main() {
reg, err := regexp.Compile(\w+)
fmt.Printf("%q,%v\n", reg.FindString(“Hello World!”), err)
// “Hello”,
}

// CompilePOSIX 的作用和 Compile 一样
// 不同的是，CompilePOSIX 使用 POSIX 语法，
// 同时，它采用最左最长方式搜索，
// 而 Compile 采用最左最短方式搜索
// POSIX 语法不支持 Perl 的语法格式：\d、\D、\s、\S、\w、\W
func CompilePOSIX(expr string) (*Regexp, error)

func main() {
reg, err := regexp.CompilePOSIX([[:word:]]+)
fmt.Printf("%q,%v\n", reg.FindString(“Hello World!”), err)
// “Hello”
}

// MustCompile 的作用和 Compile 一样
// 不同的是，当正则表达式 str 不合法时，MustCompile 会抛出异常
// 而 Compile 仅返回一个 error 值
func MustCompile(str string) *Regexp

func main() {
reg := regexp.MustCompile(\w+)
fmt.Println(reg.FindString(“Hello World!”))
// Hello
}

// MustCompilePOSIX 的作用和 CompilePOSIX 一样
// 不同的是，当正则表达式 str 不合法时，MustCompilePOSIX 会抛出异常
// 而 CompilePOSIX 仅返回一个 error 值
func MustCompilePOSIX(str string) *Regexp

func main() {
reg := regexp.MustCompilePOSIX([[:word:]].+)
fmt.Printf("%q\n", reg.FindString(“Hello World!”))
// "Hello "
}

// 在 b 中查找 re 中编译好的正则表达式，并返回第一个匹配的内容
func (re *Regexp) Find(b []byte) []byte

func main() {
reg := regexp.MustCompile(\w+)
fmt.Printf("%q", reg.Find([]byte(“Hello World!”)))
// “Hello”
}

// 在 s 中查找 re 中编译好的正则表达式，并返回第一个匹配的内容
func (re *Regexp) FindString(s string) string

func main() {
reg := regexp.MustCompile(\w+)
fmt.Println(reg.FindString(“Hello World!”))
// “Hello”
}

// 在 b 中查找 re 中编译好的正则表达式，并返回所有匹配的内容
// {{匹配项}, {匹配项}, …}
// 只查找前 n 个匹配项，如果 n < 0，则查找所有匹配项
func (re *Regexp) FindAll(b []byte, n int) [][]byte

func main() {
reg := regexp.MustCompile(\w+)
fmt.Printf("%q", reg.FindAll([]byte(“Hello World!”), -1))
// [“Hello” “World”]
}

// 在 s 中查找 re 中编译好的正则表达式，并返回所有匹配的内容
// {匹配项, 匹配项, …}
// 只查找前 n 个匹配项，如果 n < 0，则查找所有匹配项
func (re *Regexp) FindAllString(s string, n int) []string

func main() {
reg := regexp.MustCompile(\w+)
fmt.Printf("%q", reg.FindAllString(“Hello World!”, -1))
// [“Hello” “World”]
}

// 在 b 中查找 re 中编译好的正则表达式，并返回第一个匹配的位置
// {起始位置, 结束位置}
func (re *Regexp) FindIndex(b []byte) (loc []int)

func main() {
reg := regexp.MustCompile(\w+)
fmt.Println(reg.FindIndex([]byte(“Hello World!”)))
// [0 5]
}

// 在 s 中查找 re 中编译好的正则表达式，并返回第一个匹配的位置
// {起始位置, 结束位置}
func (re *Regexp) FindStringIndex(s string) (loc []int)

func main() {
reg := regexp.MustCompile(\w+)
fmt.Println(reg.FindStringIndex(“Hello World!”))
// [0 5]
}

// 在 r 中查找 re 中编译好的正则表达式，并返回第一个匹配的位置
// {起始位置, 结束位置}
func (re *Regexp) FindReaderIndex(r io.RuneReader) (loc []int)

func main() {
r := bytes.NewReader([]byte(“Hello World!”))
reg := regexp.MustCompile(\w+)
fmt.Println(reg.FindReaderIndex®)
// [0 5]
}

// 在 b 中查找 re 中编译好的正则表达式，并返回所有匹配的位置
// {{起始位置, 结束位置}, {起始位置, 结束位置}, …}
// 只查找前 n 个匹配项，如果 n < 0，则查找所有匹配项
func (re *Regexp) FindAllIndex(b []byte, n int) [][]int

func main() {
reg := regexp.MustCompile(\w+)
fmt.Println(reg.FindAllIndex([]byte(“Hello World!”), -1))
// [[0 5] [6 11]]
}

// 在 s 中查找 re 中编译好的正则表达式，并返回所有匹配的位置
// {{起始位置, 结束位置}, {起始位置, 结束位置}, …}
// 只查找前 n 个匹配项，如果 n < 0，则查找所有匹配项
func (re *Regexp) FindAllStringIndex(s string, n int) [][]int

func main() {
reg := regexp.MustCompile(\w+)
fmt.Println(reg.FindAllStringIndex(“Hello World!”, -1))
// [[0 5] [6 11]]
}

// 在 b 中查找 re 中编译好的正则表达式，并返回第一个匹配的内容
// 同时返回子表达式匹配的内容
// {{完整匹配项}, {子匹配项}, {子匹配项}, …}
func (re *Regexp) FindSubmatch(b []byte) [][]byte

func main() {
reg := regexp.MustCompile((\w)(\w)+)
fmt.Printf("%q", reg.FindSubmatch([]byte(“Hello World!”)))
// [“Hello” “H” “o”]
}

// 在 s 中查找 re 中编译好的正则表达式，并返回第一个匹配的内容
// 同时返回子表达式匹配的内容
// {完整匹配项, 子匹配项, 子匹配项, …}
func (re *Regexp) FindStringSubmatch(s string) []string

func main() {
reg := regexp.MustCompile((\w)(\w)+)
fmt.Printf("%q", reg.FindStringSubmatch(“Hello World!”))
// [“Hello” “H” “o”]
}

// 在 b 中查找 re 中编译好的正则表达式，并返回所有匹配的内容
// 同时返回子表达式匹配的内容
// {
// {{完整匹配项}, {子匹配项}, {子匹配项}, …},
// {{完整匹配项}, {子匹配项}, {子匹配项}, …},
// …
// }
func (re *Regexp) FindAllSubmatch(b []byte, n int) [][][]byte

func main() {
reg := regexp.MustCompile((\w)(\w)+)
fmt.Printf("%q", reg.FindAllSubmatch([]byte(“Hello World!”), -1))
// [[“Hello” “H” “o”] [“World” “W” “d”]]
}

// 在 s 中查找 re 中编译好的正则表达式，并返回所有匹配的内容
// 同时返回子表达式匹配的内容
// {
// {完整匹配项, 子匹配项, 子匹配项, …},
// {完整匹配项, 子匹配项, 子匹配项, …},
// …
// }
// 只查找前 n 个匹配项，如果 n < 0，则查找所有匹配项
func (re *Regexp) FindAllStringSubmatch(s string, n int) [][]string

func main() {
reg := regexp.MustCompile((\w)(\w)+)
fmt.Printf("%q", reg.FindAllStringSubmatch(“Hello World!”, -1))
// [[“Hello” “H” “o”] [“World” “W” “d”]]
}

// 在 b 中查找 re 中编译好的正则表达式，并返回第一个匹配的位置
// 同时返回子表达式匹配的位置
// {完整项起始, 完整项结束, 子项起始, 子项结束, 子项起始, 子项结束, …}
func (re *Regexp) FindSubmatchIndex(b []byte) []int

func main() {
reg := regexp.MustCompile((\w)(\w)+)
fmt.Println(reg.FindSubmatchIndex([]byte(“Hello World!”)))
// [0 5 0 1 4 5]
}

// 在 s 中查找 re 中编译好的正则表达式，并返回第一个匹配的位置
// 同时返回子表达式匹配的位置
// {完整项起始, 完整项结束, 子项起始, 子项结束, 子项起始, 子项结束, …}
func (re *Regexp) FindStringSubmatchIndex(s string) []int

func main() {
reg := regexp.MustCompile((\w)(\w)+)
fmt.Println(reg.FindStringSubmatchIndex(“Hello World!”))
// [0 5 0 1 4 5]
}

// 在 r 中查找 re 中编译好的正则表达式，并返回第一个匹配的位置
// 同时返回子表达式匹配的位置
// {完整项起始, 完整项结束, 子项起始, 子项结束, 子项起始, 子项结束, …}
func (re *Regexp) FindReaderSubmatchIndex(r io.RuneReader) []int

func main() {
r := bytes.NewReader([]byte(“Hello World!”))
reg := regexp.MustCompile((\w)(\w)+)
fmt.Println(reg.FindReaderSubmatchIndex®)
// [0 5 0 1 4 5]
}

// 在 b 中查找 re 中编译好的正则表达式，并返回所有匹配的位置
// 同时返回子表达式匹配的位置
// {
// {完整项起始, 完整项结束, 子项起始, 子项结束, 子项起始, 子项结束, …},
// {完整项起始, 完整项结束, 子项起始, 子项结束, 子项起始, 子项结束, …},
// …
// }
// 只查找前 n 个匹配项，如果 n < 0，则查找所有匹配项
func (re *Regexp) FindAllSubmatchIndex(b []byte, n int) [][]int

func main() {
reg := regexp.MustCompile((\w)(\w)+)
fmt.Println(reg.FindAllSubmatchIndex([]byte(“Hello World!”), -1))
// [[0 5 0 1 4 5] [6 11 6 7 10 11]]
}

// 在 s 中查找 re 中编译好的正则表达式，并返回所有匹配的位置
// 同时返回子表达式匹配的位置
// {
// {完整项起始, 完整项结束, 子项起始, 子项结束, 子项起始, 子项结束, …},
// {完整项起始, 完整项结束, 子项起始, 子项结束, 子项起始, 子项结束, …},
// …
// }
// 只查找前 n 个匹配项，如果 n < 0，则查找所有匹配项
func (re *Regexp) FindAllStringSubmatchIndex(s string, n int) [][]int

func main() {
reg := regexp.MustCompile((\w)(\w)+)
fmt.Println(reg.FindAllStringSubmatchIndex(“Hello World!”, -1))
// [[0 5 0 1 4 5] [6 11 6 7 10 11]]
}

// 将 template 的内容经过处理后，追加到 dst 的尾部。
// template 中要有 $1、 $2 、$ {name1}、${name2} 这样的“分组引用符”
// match 是由 FindSubmatchIndex 方法返回的结果，里面存放了各个分组的位置信息
// 如果 template 中有“分组引用符”，则以 match 为标准，
// 在 src 中取出相应的子串，替换掉 template 中的 $1、$2 等引用符号。
func (re *Regexp) Expand(dst []byte, template []byte, src []byte, match []int) []byte

func main() {
reg := regexp.MustCompile((\w+),(\w+))
src := []byte(“Golang,World!”) // 源文本
dst := []byte(“Say: “) // 目标文本
template := []byte(“Hello $1, Hello $2”) // 模板
match := reg.FindSubmatchIndex(src) // 解析源文本
// 填写模板，并将模板追加到目标文本中
fmt.Printf(”%q”, reg.Expand(dst, template, src, match))
// “Say: Hello Golang, Hello World”
}

// 功能同 Expand 一样，只不过参数换成了 string 类型
func (re *Regexp) ExpandString(dst []byte, template string, src string, match []int) []byte

func main() {
reg := regexp.MustCompile((\w+),(\w+))
src := “Golang,World!” // 源文本
dst := []byte(“Say: “) // 目标文本（可写）
template := “Hello $1, Hello $2” // 模板
match := reg.FindStringSubmatchIndex(src) // 解析源文本
// 填写模板，并将模板追加到目标文本中
fmt.Printf(”%q”, reg.ExpandString(dst, template, src, match))
// “Say: Hello Golang, Hello World”
}

// LiteralPrefix 返回所有匹配项都共同拥有的前缀（去除可变元素）
// prefix：共同拥有的前缀
// complete：如果 prefix 就是正则表达式本身，则返回 true，否则返回 false
func (re *Regexp) LiteralPrefix() (prefix string, complete bool)

func main() {
reg := regexp.MustCompile(Hello[\w\s]+)
fmt.Println(reg.LiteralPrefix())
// Hello false
reg = regexp.MustCompile(Hello)
fmt.Println(reg.LiteralPrefix())
// Hello true
}

// 切换到“贪婪模式”
func (re *Regexp) Longest()

func main() {
text := Hello World, 123 Go!
pattern := (?U)H[\w\s]+o // 正则标记“非贪婪模式”(?U)
reg := regexp.MustCompile(pattern)
fmt.Printf("%q\n", reg.FindString(text))
// Hello
reg.Longest() // 切换到“贪婪模式”
fmt.Printf("%q\n", reg.FindString(text))
// Hello Wo
}

// 判断在 b 中能否找到匹配项
func (re *Regexp) Match(b []byte) bool

func main() {
b := []byte(Hello World)
reg := regexp.MustCompile(Hello\w+)
fmt.Println(reg.Match(b))
// false
reg = regexp.MustCompile(Hello[\w\s]+)
fmt.Println(reg.Match(b))
// true
}

// 判断在 r 中能否找到匹配项
func (re *Regexp) MatchReader(r io.RuneReader) bool

func main() {
r := bytes.NewReader([]byte(Hello World))
reg := regexp.MustCompile(Hello\w+)
fmt.Println(reg.MatchReader®)
// false
r.Seek(0, 0)
reg = regexp.MustCompile(Hello[\w\s]+)
fmt.Println(reg.MatchReader®)
// true
}

// 判断在 s 中能否找到匹配项
func (re *Regexp) MatchString(s string) bool

func main() {
s := Hello World
reg := regexp.MustCompile(Hello\w+)
fmt.Println(reg.MatchString(s))
// false
reg = regexp.MustCompile(Hello[\w\s]+)
fmt.Println(reg.MatchString(s))
// true
}

// 统计正则表达式中的分组个数（不包括“非捕获的分组”）
func (re *Regexp) NumSubexp() int

func main() {
reg := regexp.MustCompile((?U)(?:Hello)(\s+)(\w+))
fmt.Println(reg.NumSubexp())
// 2
}

// 在 src 中搜索匹配项，并替换为 repl 指定的内容
// 全部替换，并返回替换后的结果
func (re *Regexp) ReplaceAll(src, repl []byte) []byte

func main() {
b := []byte(“Hello World, 123 Go!”)
reg := regexp.MustCompile((Hell|G)o)
rep := []byte("${1}ooo")
fmt.Printf("%q\n", reg.ReplaceAll(b, rep))
// “Hellooo World, 123 Gooo!”
}

// 在 src 中搜索匹配项，并替换为 repl 指定的内容
// 全部替换，并返回替换后的结果
func (re *Regexp) ReplaceAllString(src, repl string) string

func main() {
s := “Hello World, 123 Go!”
reg := regexp.MustCompile((Hell|G)o)
rep := “${1}ooo”
fmt.Printf("%q\n", reg.ReplaceAllString(s, rep))
// “Hellooo World, 123 Gooo!”
}

// 在 src 中搜索匹配项，并替换为 repl 指定的内容
// 如果 repl 中有“分组引用符”（ $1 、$ name），则将“分组引用符”当普通字符处理
// 全部替换，并返回替换后的结果
func (re *Regexp) ReplaceAllLiteral(src, repl []byte) []byte

func main() {
b := []byte(“Hello World, 123 Go!”)
reg := regexp.MustCompile((Hell|G)o)
rep := []byte(" $fmt.Printf("%q\n", reg.ReplaceAllLiteral(b, rep)) // "$ {1}ooo World, 123 ${1}ooo!"
}

// 在 src 中搜索匹配项，并替换为 repl 指定的内容
// 如果 repl 中有“分组引用符”（ $1 、$ name），则将“分组引用符”当普通字符处理
// 全部替换，并返回替换后的结果
func (re *Regexp) ReplaceAllLiteralString(src, repl string) string

func main() {
s := “Hello World, 123 Go!”
reg := regexp.MustCompile((Hell|G)o)
rep := “ $fmt.Printf("%q\n", reg.ReplaceAllLiteralString(s, rep)) // "$ {1}ooo World, 123 ${1}ooo!”
}

// 在 src 中搜索匹配项，然后将匹配的内容经过 repl 处理后，替换 src 中的匹配项
// 如果 repl 的返回值中有“分组引用符”（ $1 、$ name），则将“分组引用符”当普通字符处理
// 全部替换，并返回替换后的结果
func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte

func main() {
s := []byte(“Hello World!”)
reg := regexp.MustCompile("(H)ello")
rep := []byte("$0$1")
fmt.Printf("%s\n", reg.ReplaceAll(s, rep))
// HelloH World!

fmt.Printf("%s\n", reg.ReplaceAllFunc(s,
func(b []byte) []byte {
rst := []byte{}
rst = append(rst, b…)
rst = append(rst, “$1”…)
return rst
}))
// Hello$1 World!
}
k

// 在 src 中搜索匹配项，然后将匹配的内容经过 repl 处理后，替换 src 中的匹配项
// 如果 repl 的返回值中有“分组引用符”（ $1 、$ name），则将“分组引用符”当普通字符处理
// 全部替换，并返回替换后的结果
func (re *Regexp) ReplaceAllStringFunc(src string, repl func(string) string) string

func main() {
s := “Hello World!”
reg := regexp.MustCompile("(H)ello")
rep := “$0$1”
fmt.Printf("%s\n", reg.ReplaceAllString(s, rep))
// HelloH World!
fmt.Printf("%s\n", reg.ReplaceAllStringFunc(s,
func(b string) string {
return b + “$1”
}))
// Hello$1 World!
}

// 在 s 中搜索匹配项，并以匹配项为分割符，将 s 分割成多个子串
// 最多分割出 n 个子串，第 n 个子串不再进行分割
// 如果 n < 0，则分割所有子串
// 返回分割后的子串列表
func (re *Regexp) Split(s string, n int) []string

func main() {
s := “Hello World\tHello\nGolang”
reg := regexp.MustCompile(\s)
fmt.Printf("%q\n", reg.Split(s, -1))
// [“Hello” “World” “Hello” “Golang”]
}

// 返回 re 中的“正则表达式”字符串
func (re *Regexp) String() string

func main() {
re := regexp.MustCompile("Hello.* $fmt.Printf("%s\n", re.String()) // Hello.*$
}

// 返回 re 中的分组名称列表，未命名的分组返回空字符串
// 返回值[0] 为整个正则表达式的名称
// 返回值[1] 是分组 1 的名称
// 返回值[2] 是分组 2 的名称
// ……
func (re *Regexp) SubexpNames() []string

func main() {
re := regexp.MustCompile("(?PHello) (World)")
fmt.Printf("%q\n", re.SubexpNames())
// ["" “Name1” “”]
}

go-regexp

Golang学习 - regexp 包

regexp 包中的函数和方法

fmt.Printf("%s\n", reg.ReplaceAllFunc(s,
func(b []byte) []byte {
rst := []byte{}
rst = append(rst, b…)
rst = append(rst, “$1”…)
return rst
}))
// Hello$1 World!
}
k

相关阅读

相关文章

相关问答

相关文档

go-regexp

Golang学习 - regexp 包

regexp 包中的函数和方法

fmt.Printf("%s\n", reg.ReplaceAllFunc(s, func(b []byte) []byte { rst := []byte{} rst = append(rst, b…) rst = append(rst, “$1”…) return rst })) // Hello$1 World! } k

相关阅读

相关文章

相关问答

相关文档

fmt.Printf("%s\n", reg.ReplaceAllFunc(s,
func(b []byte) []byte {
rst := []byte{}
rst = append(rst, b…)
rst = append(rst, “$1”…)
return rst
}))
// Hello$1 World!
}
k