sort -n
— это то, что вы хотите использовать. Он сортирует, как мы сортируем числа.sort -k 2 -n file1
Упрощенный сценарий для достижения желаемого результата:
#!/usr/bin/env bash
# Assumptions: the file name is always passed, and points to a valid file,
# hence no error handling has been implemented. (for script simplicity)
# let the first argument to the script be the file name.
filename="$1"
# read one line at a time, extracting the required fields
while read -r line
do
# skip blank lines
if [[ ${#line} -gt 0 ]]; then
sid=$(echo "$line"|grep -o 'sid[^;]*'| awk -F ':' '{print $2}')
msg=$(echo "$line"|grep -o 'msg:[^;]*'| awk -F '"' '{print $2}')
classType=$(echo "$line"|grep -o 'classtype:[^;]*'| awk -F ':' '{print $2}')
cDate=$(echo "$line"|grep -o "created_at[^,]*"|awk '{print $2}')
uDate=$(echo "$line"|grep -o "updated_at[^';']*"|awk '{print $2}')
echo "$sid,$msg,$classType,$cDate,$uDate"
fi
done < "$filename"
Запуск сценария:
./scriptName fileName
ВЫХОД:
2008298,ET CHAT GaduGadu Chat Client Login Packet,policy-violation,2010_07_30,2010_07_30
2020661,ET EXPLOIT FREAK Weak Export Suite From Server (CVE-2015-0204),bad-unknown,2015_03_10,2015_03_10
2008302,ET CHAT GaduGadu Chat Send Message,policy-violation,2010_07_30,2010_07_30
2008303,ET CHAT GaduGadu Chat Receive Message,policy-violation,2010_07_30,2010_07_30
2008304,ET CHAT GaduGadu Chat Keepalive PING,policy-violation,2010_07_30,2010_07_30
2022972,ET EXPLOIT CVE-2016-0189 Common Construct M2,attempted-user,2016_07_15,2016_07_15
Вот общий подход к тому, чтобы делать то, что вы хотите, используя GNU awk для FPAT:
$ cat tst.awk
BEGIN {
FPAT="[[:alnum:]_]+:(\"[^\"]+\"|[^;]+)"
OFS = ","
}
{
delete f
for (i=1; i<=NF; i++) {
tag = val = $i
sub(/:.*/,"",tag)
sub(/[^:]+:/,"",val)
gsub(/"/,"",val)
f[tag] = val
if ( tag == "metadata" ) {
numSubFlds = split(val,md,/, */)
for (j=1; j<=numSubFlds; j++) {
subTag = subVal = md[j]
sub(/.*/,"",subTag)
sub(/[^ ]+ /,"",subVal)
f[tag":"subTag] = subVal
}
}
}
# uncomment this to see all tags and values
# for (idx in f) { print idx "=" f[idx] }
# print
print f["sid"], f["msg"], f["classtype"], f["metadata:created_at"], f["metadata:updated_at"]
}
.
$ gawk -f tst.awk file
2008298,ET CHAT GaduGadu Chat Client Login Packet,policy-violation,2010_07_30,2010_07_30
2020661,,bad-unknown,2015_03_10,2015_03_10
2008302,ET CHAT GaduGadu Chat Send Message,policy-violation,2010_07_30,2010_07_30
2008303,ET CHAT GaduGadu Chat Receive Message,policy-violation,2010_07_30,2010_07_30
2008304,ET CHAT GaduGadu Chat Keepalive PING,policy-violation,2010_07_30,2010_07_30
2022972,ET EXPLOIT CVE-2016-0189 Common Construct M2,attempted-user,2016_07_15,2016_07_15
Похоже, что ваша вторая строка ввода не соответствует тому же формату, что и другие, поэтому вывод отличается.