字符串查找替换

查找子串

chapter_3_5-1.awk

BEGIN{
}   
{   
    content = "That's a dog, in the room, who is go the google to search 101 dog.";

    # 匹配成功则返回第一次匹配成功内容在字符串中的起始位置
    print("index: "index(content, "dog"));
    print("match: "match(content, "dog"));

    # 找不到匹配项的时候返回0
    print("index: "index(content, "man"));
    print("match: "match(content, "man"));

    # 查找标点符号
    print("index: "index(content, /[^a-z,A-Z,0-1]/))
    print("match: "match(content, /[^a-z,A-Z,0-1]/));
}
END{
}

$echo ""|awk -f chapter_3_5-1.awk
index: 10
match: 10
index: 0
match: 0
index: 60
match: 5

index()函数原型:

index(s, t)
s 待查找字符串
t 目标子串
返回第一次匹配成功的索引位置,失败时返回0

match 函数原型:

match(s, r [, a])
s 待查找字符串
r 查询的正则表达式 a 结果二维数组,可选参数,如果匹配成功保存第一个匹配到字符串的相关信息,a[0,"start"]: 首个匹配成功子串的开始位置,a[0,"length"]: 首个匹配成功子串的长度,a[0]: 首次匹配成功的子字符串,匹配失败时数组为空
返回第一次匹配成功的索引位置,RSTART设置成第一次匹配成功的索引位置,RLENGTH设置成匹配成功子串的长度(匹配失败为-1)

chapter_3_5-2.awk

BEGIN{
}   
{   
    content = "That's a dog, in the room, the dark room, who is go the google to search 101 dog.";

    print(match(content, /[r,g]..[e,m]/, array));

    # 第一个匹配的开始索引位置
    print("RSTART: "RSTART);
    # 第一个匹配的子串长度
    print("RLENGTH: "RLENGTH);

    print("");
    for(key in array) {
            # 结果数组是一个二维数组,存放第一个匹配的相关信息
            len = split(key, keys, SUBSEP);
            for (i=1; i<=len; ++i)="" {="" print("key["i"]:="" "="" keys[i]);="" }="" print("value:="" array[key]);="" if="" (="" rstart=""> 0 ) {
            print("match string: " array[0]);
    }
}
END{
}

$echo ""|awk -f chapter_3_5-2.awk 22
RSTART: 22
RLENGTH: 4

key[1]: 0
key[2]: start
value: 22
key[1]: 0
key[2]: length
value: 4
key[1]: 0
value: room
match string: room

match()可以看作是增强版的index(),支持正则表达式,并且返回的内容更多。

替换子串

chapter_3_5-3.awk

BEGIN{
}   
{
    content = "That's a dog, in the room, the dark room, who is go the google to search 101 dog.";

    # 只替换第一次匹配成功的子串
    print("");
    str = content;
    num = sub("dog", "cat", str);
    print("replace num: " num);
    print("after replace: " str);

    # 替换所有匹配成功的子串
    print("");
    str = content;
    num = gsub("dog", "cat", str);
    print("replace num: " num);
    print("after replace: " str); 

    # 替换所有匹配成功子串或是第二次匹配到的子串
    print("");
    str = gensub("dog", "cat", "g", content);
    print("content: " content);
    print("str: " str);

    print("");
    str = gensub("dog", "cat", 2, content);
    print("content: " content);
    print("str: " str);
}
END{
}

$echo ""|awk -f chapter_3_5-3.awk

replace num: 1
after replace: That's a cat, in the room, the dark room, who is go the google to search 101 dog.

replace num: 2
after replace: That's a cat, in the room, the dark room, who is go the google to search 101 cat.

content: That's a dog, in the room, the dark room, who is go the google to search 101 dog.
str: That's a cat, in the room, the dark room, who is go the google to search 101 cat.

content: That's a dog, in the room, the dark room, who is go the google to search 101 dog.
str: That's a dog, in the room, the dark room, who is go the google to search 101 cat.

sub()gsub()参数用法完全相同,唯一的区别是sub()只替换首次匹配到的子串,gsub()替换所有匹配到的子串,而gensub()更为灵活,可以指定替换字串出现的位置或是全局替换,并且不会改变原字符串的值,函数原型如下:

sub(r, s [, t])
gsub(r, s [, t])
r 用于匹配的正则表达式
s 要替换的字符串值
t 目标字符串,可选参数,默认使用$0
返回,成功替换子串的数目

gensub(r, s, h [, t])
r 用于匹配的正则表达式
s 要替换的字符串值
h "g"用来全局替换,或是用数字指定字串出现的位置 t 目标字符串,可选参数,默认使用$0

results matching ""

    No results matching ""