echo "France
Canada
Burkina  Faso
Democratic Republic of the Congo
Russia
New Zealand" > countries.txt


echo "Germany
Austria
Poland" > countries_1.txt


echo -e "1618391758\t192.168.0.140\tindex.html
1618391759\t192.168.0.120\tmy_test.html
1618391762\t192.168.0.140\tlogin.html
1618391765\t192.168.0.140\tbla.html
1618391771\t192.168.0.10\tblub.html" > logfile-example.log


echo -e "192.168.0.10\tsokrates
192.168.0.120\taristoteles
192.168.0.140\tplaton" > rechnernamen.txt


cat countries.txt <(echo ---------) countries_1.txt

# dies könnte natürlich auch wieder als neue Datei geschrieben werden.
# cat countries.txt countries_1.txt > merged_countries.txt

France
Canada
Burkina  Faso
Democratic Republic of the Congo
Russia
New Zealand
---------
Germany
Austria
Poland


# Anzeigen der Beispieldatei (siehe oben)
cat logfile-example.log
# zeitstempel  IP  Webseite

1618391758	192.168.0.140	index.html
1618391759	192.168.0.120	my_test.html
1618391762	192.168.0.140	login.html
1618391765	192.168.0.140	bla.html
1618391771	192.168.0.10	blub.html


# Hinweis:
# die Zeitstempelangaben können mit `date` für Menschen übersetzt werden:
date -d @1618391759

Do 16. Jun 10:40:56 CEST 2022


# extrahiere das 1. und 3. Feld (Spalte) mit cut
# Das Standard Feld-Trennzeichen ist TAB, mit
#   -d kann man andere Trennzeichen verwenden.
#   -f gibt die Felder an, die man extrahieren will.
cut logfile-example.log -d $'\t' -f1,3

1618391758	index.html
1618391759	my_test.html
1618391762	login.html
1618391765	bla.html
1618391771	blub.html


# Sortieren nach dem zweiten Feld (der zweiten Spalte):
sort -t $'\t' -k2 logfile-example.log

1618391771	192.168.0.10	blub.html
1618391759	192.168.0.120	my_test.html
1618391765	192.168.0.140	bla.html
1618391758	192.168.0.140	index.html
1618391762	192.168.0.140	login.html


join -1 2 -2 1 -t $'\t' <(sort -t $'\t' -k2 logfile-example.log) rechnernamen.txt

192.168.0.10	1618391771	blub.html	sokrates
192.168.0.120	1618391759	my_test.html	aristoteles
192.168.0.140	1618391765	bla.html	platon
192.168.0.140	1618391758	index.html	platon
192.168.0.140	1618391762	login.html	platon


# falls unsortiert
join -1 2 -2 1 -t $'\t' logfile-example.log rechnernamen.txt

join: logfile-example.log:2: is not sorted: 1618391759	192.168.0.120	my_test.html
192.168.0.140	1618391758	index.html	platon


tr ' [:upper:]' '\t[:lower:]' < countries.txt

france
canada
burkina		faso
democratic	republic	of	the	congo
russia
new	zealand


# Hier gibt es (unterschiedlich) mehrere Spaces zwischen den Feldern:
echo -e "1618391758 192.168.0.140 index.html
1618391759   192.168.0.120  my_test.html
1618391762  192.168.0.140  login.html
" > logfile-example_.log


# dadurch werden die falschen Felder ausgewählt (eventuell leere):
cut logfile-example_.log -d' ' -f1,3

1618391758 index.html
1618391759 
1618391762 192.168.0.140


# mehrere hintereinanderfolgende "Whitespace" zu einem Zeichen 
tr -s '[:space:]' < logfile-example_.log  | cut -d' ' -f1,3

1618391758 index.html
1618391759 my_test.html
1618391762 login.html


# Erzeuge eine Datei mit Duplikaten:

# zweimal countries für Duplikate und extra France
cat countries.txt countries_1.txt countries.txt <(echo France) | tee duplicate_countries

France
Canada
Burkina  Faso
Democratic Republic of the Congo
Russia
New Zealand
Germany
Austria
Poland
France
Canada
Burkina  Faso
Democratic Republic of the Congo
Russia
New Zealand
France


sort duplicate_countries | uniq -c

      1 Austria
      2 Burkina  Faso
      2 Canada
      2 Democratic Republic of the Congo
      3 France
      1 Germany
      2 New Zealand
      1 Poland
      2 Russia


# mit Sequenzen wies
seq 1 5


paste <(seq 1 5) <(seq 11 15) <(seq 21 25)

1	11	21
2	12	22
3	13	23
4	14	24
5	15	25


# auch zeilenweise mögich
paste -s <(seq 1 5) <(seq 11 15) <(seq 21 25)

1	2	3	4	5
11	12	13	14	15
21	22	23	24	25


echo $(paste -sd+ <(seq 1 5)) 
paste -sd+ <(seq 1 5) | bc

1+2+3+4+5
15


cat logfile-example.log

1618391758	192.168.0.140	index.html
1618391759	192.168.0.120	my_test.html
1618391762	192.168.0.140	login.html
1618391765	192.168.0.140	bla.html
1618391771	192.168.0.10	blub.html


whatis nl 
echo
nl countries.txt

nl (1)               - number lines of files
nl (1posix)          - line numbering filter

     1	France
     2	Canada
     3	Burkina  Faso
     4	Democratic Republic of the Congo
     5	Russia
     6	New Zealand


whatis column
echo
head -4 /etc/passwd
echo
echo columnized with column:
head -4 /etc/passwd | column -t -s :

column (1)           - columnate lists

root:x:0:0:root:/root:/bin/bash
daemon:x:1:1:daemon:/usr/sbin:/usr/sbin/nologin
bin:x:2:2:bin:/bin:/usr/sbin/nologin
sys:x:3:3:sys:/dev:/usr/sbin/nologin

columnized with column:
root    x  0  0  root    /root      /bin/bash
daemon  x  1  1  daemon  /usr/sbin  /usr/sbin/nologin
bin     x  2  2  bin     /bin       /usr/sbin/nologin
sys     x  3  3  sys     /dev       /usr/sbin/nologin


# Ersetze alle Leerzeichen durch ein underscore `_`
# Mehrere Leerzeichen hintereinander nur mit einem _
cat countries.txt | sed -E 's/[[:space:]]+/_/g'

France
Canada
Burkina_Faso
Democratic_Republic_of_the_Congo
Russia
New_Zealand


# Beispiel: Schreibe Fehlerausgabe eines Kommandos zusätzlich in eine log-Datei
# Die log-Datei soll das Datum im Namen haben (ohne Leerzeichen!)
# hier mit echo "..." um es sichtbar zu machen
echo "command 2> $(date | sed -E 's/[[:space:]]+/_/g')_command_errors.log"

# aber besser ist als Name 
echo "command 2> $(date -Iseconds)_command_errors.log"

command 2> Do_12._Mai_13:50:12_CEST_2022_command_errors.log
command 2> 2022-05-12T13:50:12+02:00_command_errors.log


cat countries.txt | sed -E 's/(a.a)/a\1a/g'

France
Caanaada
Burkina  Faso
Democratic Republic of the Congo
Russia
New Zeaalaand


cat text_with_urls.txt
# Kopieren Sie z.B. den Text in eine Datei zu testen Ihrer Lösung

Es gibt im Internet viele Adressen, wie z.B. http://wikipedia.de oder http://christianherta.de/ und so weiter.
In dieser Zeile steht keine URL.
Aber es ist auch eine Angabe mit Portnummer erlaubt, wie http://localhost:8080/mein/pfad oder mit einem Usernamen http://benutzername@www.mydomain.de/seite/beispiel.php für 
Durch Textmarken kann man direkt auf Bereiche springen, wie z.B. http://www.mydomain.de/seite//beispiel.php#textmarke
Wieder keine Zeile ohne URL.
Wieder keine Zeile ohne URL. Dies soll bei der Extraktion ignoriert werden (auch keine Leerzeile!)
Statt Servernamen, die per DNS aufgelöst werden, kann aber auch direkt eine IP Adresse stehen: https://192.168.0.1/seite/beispiel.php?vname=christian&ort=berlin Bei dieser Adresse werden noch URL Parameter am Ende übergeben (http get). Der Query String beginnt dabei mit einem Fragezeichen "?" und die Parameter haben die Form name=wert und sind mit "&" getrennt.
Wieder keine Zeile ohne URL.
Diese soll bei der Ausgabe ignoriert werden (auch keine Leerzeile!).


line="Das ist eine IPv4 Adresse 192.168.0.5 in einem privaten Netz."

# Extract the IPv4
echo $line | sed -n -E 's/.*[^[:digit:]](([0-9]{1,3}\.){3}[0-9]{1,3})[^[:digit:]].*/\1/p'
# Machen Sie sich die Bedeutung der regex klar!

192.168.0.5


# Ersetze alle "a" durch "e" außer in den Zeilen mit einem großen "C"
sed -E '/C/! s/a/e/g' countries.txt

Frence
Canada
Burkine Feso
Democratic Republic of the Congo
Russie
New Zeelend


awk '{print $2 "\t" $3}' logfile-example.log

192.168.0.140	index.html
192.168.0.120	my_test.html
192.168.0.140	login.html
192.168.0.140	bla.html
192.168.0.10	blub.html


# Erinnerung
cat logfile-example.log # Logfile der Abrufe
# Zeitstempel  IP-des-Web-Servers  Abgerufene-Seite
echo
cat rechnernamen.txt # Ip-addresse Webservername

1618391758	192.168.0.140	index.html
1618391759	192.168.0.120	my_test.html
1618391762	192.168.0.140	login.html
1618391765	192.168.0.140	bla.html
1618391771	192.168.0.10	blub.html

192.168.0.10	sokrates
192.168.0.120	aristoteles
192.168.0.140	platon


join -1 2 -2 1 -t $'\t' <(sort -t $'\t' -k2 logfile-example.log) rechnernamen.txt | \
sort -t $'\t' -k2 | awk '{ print strftime("%m/%d/%Y %H:%M:%S", $2) "\t" $4 }'

04/14/2021 11:15:58	platon
04/14/2021 11:15:59	aristoteles
04/14/2021 11:16:02	platon
04/14/2021 11:16:05	platon
04/14/2021 11:16:11	sokrates


cat duplicate_countries

France
Canada
Burkina Faso
Democratic Republic of the Congo
Russia
New Zealand
Germany
Austria
Poland
France
Canada
Burkina Faso
Democratic Republic of the Congo
Russia
New Zealand
France


echo "Canada
Burkina Faso
France
Democratic Republic of the Kongo
Austria
New Zealand
Germany
Switzerland" > countries_2.txt


diff countries.txt countries_2.txt

1d0
< France
4,5c3,5
< Democratic Republic of the Congo
< Russia
---
> France
> Democratic Republic of the Kongo
> Austria
6a7,8
> Germany
> Switzerland


echo "Hallo Welt
hallo welt sind Grüße
an die Welt" > greetings


man ls | sed -e 's/[^[:alpha:]]/ /g' | tr '\n' " " |  tr -s " " | tr " " '\n'| \
tr 'A-Z' 'a-z' | sort | uniq -c | sort -nr | nl

     1	     20 sort
     2	     20 of
     3	     19 by
     4	     17 with
     5	     17 the
     6	     16 to
     7	     16 list
     8	     15 file
     9	     15 and
    10	     14 time
    11	     14 l
    12	     13 style
    13	     13 is
    14	     12 or
    15	     12 not
    16	     12 ls
    17	     12 a
    18	     10 size
    19	     10 format
    20	     10 entries
    21	     10 do
    22	      9 word
    23	      9 show
    24	      9 print
    25	      9 if
    26	      8 g
    27	      8 for
    28	      8 default
    29	      7 use
    30	      6 when
    31	      6 shell
    32	      6 output
    33	      6 names
    34	      6 m
    35	      6 line
    36	      6 in
    37	      6 group
    38	      6 gnu
    39	      6 first
    40	      6 each
    41	      6 command
    42	      6 color
    43	      6 c
    44	      6 access
    45	      5 x
    46	      5 version
    47	      5 u
    48	      5 t
    49	      5 name
    50	      5 long
    51	      5 like
    52	      5 instead
    53	      5 information
    54	      5 indicator
    55	      5 entry
    56	      5 e
    57	      5 directory
    58	      5 coreutils
    59	      5 but
    60	      4 software
    61	      4 s
    62	      4 pattern
    63	      4 org
    64	      4 none
    65	      4 newest
    66	      4 link
    67	      4 k
    68	      4 iso
    69	      4 implied
    70	      4 http
    71	      4 full
    72	      4 files
    73	      4 escape
    74	      4 directories
    75	      4 ctime
    76	      4 cols
    77	      4 can
    78	      4 append
    79	      4 always
    80	      3 width
    81	      3 type
    82	      3 symbolic
    83	      3 status
    84	      3 sizes
    85	      3 quoting
    86	      3 q
    87	      3 powers
    88	      3 p
    89	      3 nongraphic
    90	      3 no
    91	      3 never
    92	      3 n
    93	      3 modification
    94	      3 locale
    95	      3 ignore
    96	      3 help
    97	      3 free
    98	      3 f
    99	      3 exit
   100	      3 dereference
   101	      3 characters
   102	      3 be
   103	      3 b
   104	      3 auto
   105	      3 author
   106	      3 as
   107	      3 are
   108	      2 z
   109	      2 www
   110	      2 v
   111	      2 using
   112	      2 user
   113	      2 units
   114	      2 this
   115	      2 terminal
   116	      2 slash
   117	      2 set
   118	      2 see
   119	      2 reverse
   120	      2 recent
   121	      2 readable
   122	      2 r
   123	      2 posix
   124	      2 otherwise
   125	      2 order
   126	      2 options
   127	      2 option
   128	      2 only
   129	      2 one
   130	      2 omitted
   131	      2 numeric
   132	      2 matching
   133	      2 mandatory
   134	      2 lt
   135	      2 literal
   136	      2 listing
   137	      2 likewise
   138	      2 it
   139	      2 info
   140	      2 i
   141	      2 hyperlink
   142	      2 human
   143	      2 hide
   144	      2 h
   145	      2 gpl
   146	      2 follow
   147	      2 extension
   148	      2 dired
   149	      2 d
   150	      2 copyright
   151	      2 control
   152	      2 context
   153	      2 contents
   154	      2 columns
   155	      2 classify
   156	      2 chars
   157	      2 change
   158	      2 cannot
   159	      2 bugs
   160	      2 blocks
   161	      2 block
   162	      2 below
   163	      2 before
   164	      2 at
   165	      2 argument
   166	      2 any
   167	      2 also
   168	      2 alphabetically
   169	      2 all
   170	      1 you
   171	      1 y
   172	      1 written
   173	      1 without
   174	      1 within
   175	      1 while
   176	      1 warranty
   177	      1 w
   178	      1 via
   179	      1 vertical
   180	      1 verbose
   181	      1 variable
   182	      1 usage
   183	      1 unless
   184	      1 unit
   185	      1 uid
   186	      1 types
   187	      1 trouble
   188	      1 translationproject
   189	      1 translation
   190	      1 too
   191	      1 tion
   192	      1 times
   193	      1 there
   194	      1 then
   195	      1 themselves
   196	      1 them
   197	      1 their
   198	      1 that
   199	      1 than
   200	      1 text
   201	      1 team
   202	      1 takes
   203	      1 tabsize
   204	      1 tab
   205	      1 synopsis
   206	      1 symlink
   207	      1 subdirectory
   208	      1 subdirectories
   209	      1 stops
   210	      1 starting
   211	      1 standard
   212	      1 stallman
   213	      1 specified
   214	      1 speci
   215	      1 sorting
   216	      1 single
   217	      1 si
   218	      1 showing
   219	      1 short
   220	      1 settings
   221	      1 serious
   222	      1 separated
   223	      1 security
   224	      1 scale
   225	      1 richard
   226	      1 reporting
   227	      1 report
   228	      1 references
   229	      1 redistribute
   230	      1 recursively
   231	      1 recursive
   232	      1 rather
   233	      1 quotes
   234	      1 quote
   235	      1 program
   236	      1 problems
   237	      1 prints
   238	      1 printing
   239	      1 pre
   240	      1 points
   241	      1 permitted
   242	      1 per
   243	      1 owner
   244	      1 overridden
   245	      1 outside
   246	      1 optional
   247	      1 online
   248	      1 on
   249	      1 ok
   250	      1 o
   251	      1 numbers
   252	      1 number
   253	      1 nor
   254	      1 non
   255	      1 newline
   256	      1 natural
   257	      1 more
   258	      1 mode
   259	      1 minor
   260	      1 means
   261	      1 mb
   262	      1 mackenzie
   263	      1 locally
   264	      1 listed
   265	      1 links
   266	      1 lines
   267	      1 limit
   268	      1 licenses
   269	      1 license
   270	      1 law
   271	      1 later
   272	      1 last
   273	      1 largest
   274	      1 kibibytes
   275	      1 key
   276	      1 kb
   277	      1 january
   278	      1 itself
   279	      1 invocation
   280	      1 interpreted
   281	      1 integer
   282	      1 inode
   283	      1 informa
   284	      1 index
   285	      1 inc
   286	      1 ids
   287	      1 html
   288	      1 horizontal
   289	      1 grouping
   290	      1 gplv
   291	      1 gid
   292	      1 generate
   293	      1 foundation
   294	      1 fixed
   295	      1 fill
   296	      1 fied
   297	      1 extent
   298	      1 except
   299	      1 example
   300	      1 escapes
   301	      1 environment
   302	      1 ending
   303	      1 enclose
   304	      1 enable
   305	      1 emits
   306	      1 emacs
   307	      1 effect
   308	      1 double
   309	      1 don
   310	      1 documentation
   311	      1 distinguish
   312	      1 display
   313	      1 disk
   314	      1 disables
   315	      1 disabled
   316	      1 disable
   317	      1 dircolors
   318	      1 dir
   319	      1 designed
   320	      1 description
   321	      1 david
   322	      1 date
   323	      1 current
   324	      1 connected
   325	      1 commas
   326	      1 commands
   327	      1 comma
   328	      1 column
   329	      1 colors
   330	      1 colorize
   331	      1 codes
   332	      1 cftuvsux
   333	      1 bytes
   334	      1 byte
   335	      1 both
   336	      1 backups
   337	      1 avoid
   338	      1 available
   339	      1 augmented
   340	      1 au
   341	      1 atime
   342	      1 assume
   343	      1 arguments
   344	      1 applies
   345	      1 an
   346	      1 almost
   347	      1 allocated
   348	      1 across
   349	      1 about

Änderung	Beschreibung
`l1dl2`	Lösche (delete) die Zeilen ab Zeile `l1` in der ersten Datei, die auftauchen würden bei Zeile `l2` der zweiten Datei
`l1cl2`	Ersetze (bzw. ändere, change) die Zeilen `l1` der ersten Datei mit den Zeilen `l2` der zweiten Datei.
`l1al2`	Füge (add) die Zeilen ab Position `l1` in der ersten Datei hinzu, die auftauchen bei den Zeilen `l2` der zweiten Datei.

Setup¶

Text(strom)bearbeitung und -extraktion¶

Inhaltsverzeichnis¶

Werkzeuge zur Textbearbeitung¶

`cat`¶

`cut`¶

`sort`¶

`join`¶

`tr`¶

`tee`¶

`uniq`¶

`paste`¶

Aufgabe (für Experten)¶

`nl`¶

`column`¶

Stream-Editor `sed`¶

Gefundene Muster in Ersetzung nutzen¶

Aufgabe¶

Extraktion¶

Textprozessierungsprache `awk`¶

Übungsaufgaben¶

Aufgabe¶

Textvergleiche¶

`diff`¶

Gängige Datenaustauschformate¶

Übungen¶

Aufgabe¶

Aufgabe¶

Aufgabe¶

Aufgabe¶

Setup¶

Text(strom)bearbeitung und -extraktion¶

Inhaltsverzeichnis¶

Werkzeuge zur Textbearbeitung¶

cat¶

cut¶

sort¶

join¶

tr¶

tee¶

uniq¶

paste¶

Aufgabe (für Experten)¶

nl¶

column¶

Stream-Editor sed¶

Gefundene Muster in Ersetzung nutzen¶

Aufgabe¶

Extraktion¶

Textprozessierungsprache awk¶

Übungsaufgaben¶

Aufgabe¶

Textvergleiche¶

diff¶

Gängige Datenaustauschformate¶

Übungen¶

Aufgabe¶

Aufgabe¶

Aufgabe¶

Aufgabe¶

`cat`¶

`cut`¶

`sort`¶

`join`¶

`tr`¶

`tee`¶

`uniq`¶

`paste`¶

`nl`¶

`column`¶

Stream-Editor `sed`¶

Textprozessierungsprache `awk`¶

`diff`¶