Вы можете сделать это с помощью гладкого регулярного выражения, или вы можете сделать это, проанализировав поля, разбив их по соответствующим разделителям и реконструировав их так, как вы хотите, используяawk
:
BEGIN {
# Assume the two fields are tab-delimited.
OFS = FS = "\t"
}
NF > 1 {
# Split the 2nd field into sub-fields on "|" in the array a.
n = split($2, a, "|")
for (i = 1; i <= n; ++i)
# Split each sub-field on "=" and quote the two bits,
# and put them together again.
if (split(a[i], b, "=") == 2)
a[i] = sprintf("\"%s\"=\"%s\"", b[1], b[2])
else {
# Bail out on bad fields.
print >"/dev/stderr"
printf("Error in field %d on line %d\n", i, FNR) >"/dev/stderr"
exit 1
}
# Reconstruct current record.
$0 = $1
$2 = a[1]
for (i = 2; i <= n; ++i)
$2 = $2 "," a[i]
# Done, output
print
}
Тестирование:
$ awk -f script.awk file
1503668542862176 "manager"="10001","Bounced"="999","Analyst"="10004","Business Analyst"="10005","Programmer"="10003"
1552024948590636 "manager"="10001","Bounced"="999","Analyst"="10004"
1551728916565460 "Bounced"="999","Analyst"="10004"
1553617087089790 "Analyst"="10004"
1538058487418963 "manager"="10001","Architect"="10002","Analyst"="10004"