Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

Qiita 自然言語システムAdvent Calendar 2024

Day 9

awk, tr 用 shell script ttowc.sh docker(187)

Last updated at Posted at 2024-11-10


tr, awkの処理を自動化することを検討。

失敗の記録:文字列処理スクリプトの場合 docker(188)

PDFtoTEXTは、単語分割がうまくできなかった。Acrobat ReaderでTXTファイル出力の方が精度が高かった。

# https://news.mynavi.jp/article/bashonwindows-17/
# https://qiita.com/kaizen_nagoya/items/319672853519990cee42

 for File in *; do
	    tr 'A-Z' 'a-z' < ../text/${File}.txt > ../text/${File}.smt
	    awk -f ../gs.awk ../txt/${File}.smt > ../wc/${File}.wc
 # chmod +x ttowc.sh 
 root@bbf22e8ed49c:/rmp/llm# ./ttowc.sh
./ttowc.sh: line 6: ../text/2101.00678v1.txt.txt: No such file or directory
./ttowc.sh: line 7: ../wc/2101.00678v1.txt.wc: No such file or directory
./ttowc.sh: line 6: ../text/2309.14322v2.txt.txt: No such file or directory
./ttowc.sh: line 7: ../wc/2309.14322v2.txt.wc: No such file or directory
./ttowc.sh: line 6: ../text/2311.16502v4.txt.txt: No such file or directory
./ttowc.sh: line 7: ../wc/2311.16502v4.txt.wc: No such file or directory
./ttowc.sh: line 6: ../text/2408.16293v1.txt.txt: No such file or directory
./ttowc.sh: line 7: ../wc/2408.16293v1.txt.wc: No such file or directory
./ttowc.sh: line 6: ../text/2408.16293v1.wc.txt: No such file or directory
./ttowc.sh: line 7: ../wc/2408.16293v1.wc.wc: No such file or directory
./ttowc.sh: line 6: ../text/2408.16293v1L.txt.txt: No such file or directory
./ttowc.sh: line 7: ../wc/2408.16293v1L.txt.wc: No such file or directory
./ttowc.sh: line 6: ../text/2408.16293v1s.txt.txt: No such file or directory
./ttowc.sh: line 7: ../wc/2408.16293v1s.txt.wc: No such file or directory
./ttowc.sh: line 6: ../text/text.txt: No such file or directory
./ttowc.sh: line 7: ../wc/text.wc: No such file or directory
./ttowc.sh: line 6: ../text/ttowc.sh.txt: No such file or directory
./ttowc.sh: line 7: ../wc/ttowc.sh.wc: No such file or directory
./ttowc.sh: line 6: ../text/wc.txt: No such file or directory
./ttowc.sh: line 7: ../wc/wc.wc: No such file or directory
./ttowc.sh: line 6: ../text/wc.awk.txt: No such file or directory
./ttowc.sh: line 7: ../wc/wc.awk.wc: No such file or directory
# https://news.mynavi.jp/article/bashonwindows-17/
# https://qiita.com/kaizen_nagoya/items/319672853519990cee42

 for File in *; do
	    tr 'A-Z' 'a-z' < ../text/${File} > ../text/${File}.smt
	    awk -f ../gs.awk ../txt/${File}.smt > ../wc/${File}.wc
root@bbf22e8ed49c:/rmp/llm# ./ttowc.sh
./ttowc.sh: line 6: ../text/2101.00678v1.txt: No such file or directory
./ttowc.sh: line 7: ../wc/2101.00678v1.txt.wc: No such file or directory
./ttowc.sh: line 6: ../text/2309.14322v2.txt: No such file or directory
./ttowc.sh: line 7: ../wc/2309.14322v2.txt.wc: No such file or directory
./ttowc.sh: line 6: ../text/2311.16502v4.txt: No such file or directory
./ttowc.sh: line 7: ../wc/2311.16502v4.txt.wc: No such file or directory
./ttowc.sh: line 6: ../text/2408.16293v1.txt: No such file or directory
./ttowc.sh: line 7: ../wc/2408.16293v1.txt.wc: No such file or directory
./ttowc.sh: line 6: ../text/2408.16293v1.wc: No such file or directory
./ttowc.sh: line 7: ../wc/2408.16293v1.wc.wc: No such file or directory
./ttowc.sh: line 6: ../text/2408.16293v1L.txt: No such file or directory
./ttowc.sh: line 7: ../wc/2408.16293v1L.txt.wc: No such file or directory
./ttowc.sh: line 6: ../text/2408.16293v1s.txt: No such file or directory
./ttowc.sh: line 7: ../wc/2408.16293v1s.txt.wc: No such file or directory
./ttowc.sh: line 6: ../text/text: No such file or directory
./ttowc.sh: line 7: ../wc/text.wc: No such file or directory
./ttowc.sh: line 6: ../text/ttowc.sh: No such file or directory
./ttowc.sh: line 7: ../wc/ttowc.sh.wc: No such file or directory
./ttowc.sh: line 6: ../text/wc: No such file or directory
./ttowc.sh: line 7: ../wc/wc.wc: No such file or directory
./ttowc.sh: line 6: ../text/wc.awk: No such file or directory
./ttowc.sh: line 7: ../wc/wc.awk.wc: No such file or directory
# https://news.mynavi.jp/article/bashonwindows-17/
# https://qiita.com/kaizen_nagoya/items/319672853519990cee42

 for File in *; do
	    tr 'A-Z' 'a-z' < ./text/${File} > ./text/${File}.smt
	    awk -f ../gs.awk ./txt/${File}.smt > ./wc/${File}.wc
 root@bbf22e8ed49c:/rmp/llm# ./ttowc.sh
awk: cannot open "../gs.awk" (No such file or directory)
awk: cannot open "../gs.awk" (No such file or directory)
awk: cannot open "../gs.awk" (No such file or directory)
awk: cannot open "../gs.awk" (No such file or directory)
./ttowc.sh: line 6: ./text/2408.16293v1.wc: No such file or directory
awk: cannot open "../gs.awk" (No such file or directory)
awk: cannot open "../gs.awk" (No such file or directory)
awk: cannot open "../gs.awk" (No such file or directory)
./ttowc.sh: line 6: ./text/text: No such file or directory
awk: cannot open "../gs.awk" (No such file or directory)
./ttowc.sh: line 6: ./text/ttowc.sh: No such file or directory
awk: cannot open "../gs.awk" (No such file or directory)
./ttowc.sh: line 6: ./text/wc: No such file or directory
awk: cannot open "../gs.awk" (No such file or directory)
./ttowc.sh: line 6: ./text/wc.awk: No such file or directory
awk: cannot open "../gs.awk" (No such file or directory)
# https://news.mynavi.jp/article/bashonwindows-17/
# https://qiita.com/kaizen_nagoya/items/319672853519990cee42

 for File in *; do
	    tr 'A-Z' 'a-z' < ./text/${File} > ./text/${File}.smt
	    awk -f wc.awk ./txt/${File}.smt > ./wc/${File}.wc
 root@bbf22e8ed49c:/rmp/llm# ./ttowc.sh
awk: cannot open "./txt/2101.00678v1.txt.smt" (No such file or directory)
awk: cannot open "./txt/2309.14322v2.txt.smt" (No such file or directory)
awk: cannot open "./txt/2311.16502v4.txt.smt" (No such file or directory)
awk: cannot open "./txt/2408.16293v1.txt.smt" (No such file or directory)
./ttowc.sh: line 6: ./text/2408.16293v1.wc: No such file or directory
awk: cannot open "./txt/2408.16293v1.wc.smt" (No such file or directory)
awk: cannot open "./txt/2408.16293v1L.txt.smt" (No such file or directory)
awk: cannot open "./txt/2408.16293v1s.txt.smt" (No such file or directory)
./ttowc.sh: line 6: ./text/text: No such file or directory
awk: cannot open "./txt/text.smt" (No such file or directory)
./ttowc.sh: line 6: ./text/ttowc.sh: No such file or directory
awk: cannot open "./txt/ttowc.sh.smt" (No such file or directory)
./ttowc.sh: line 6: ./text/wc: No such file or directory
awk: cannot open "./txt/wc.smt" (No such file or directory)
./ttowc.sh: line 6: ./text/wc.awk: No such file or directory
awk: cannot open "./txt/wc.awk.smt" (No such file or directory)
# mkdir small
# https://news.mynavi.jp/article/bashonwindows-17/
# https://qiita.com/kaizen_nagoya/items/319672853519990cee42

 for File in *; do
	    tr 'A-Z' 'a-z' < ./text/${File} > ./small/${File}
	    awk -f wc.awk ./small/${File} > ./wc/${File}.wc



root@bbf22e8ed49c:/rmp/llm# ./ttowc.sh
./ttowc.sh: line 6: ./text/2408.16293v1.wc: No such file or directory
awk: cannot open "./small/2408.16293v1.wc.smt" (No such file or directory)
./ttowc.sh: line 6: ./text/2408.16293v1L.txt: No such file or directory
awk: cannot open "./small/2408.16293v1L.txt.smt" (No such file or directory)
./ttowc.sh: line 6: ./text/2408.16293v1s.txt: No such file or directory
awk: cannot open "./small/2408.16293v1s.txt.smt" (No such file or directory)
./ttowc.sh: line 6: ./text/small: No such file or directory
awk: cannot open "./small/small.smt" (No such file or directory)
./ttowc.sh: line 6: ./text/text: No such file or directory
awk: cannot open "./small/text.smt" (No such file or directory)
./ttowc.sh: line 6: ./text/ttowc.sh: No such file or directory
awk: cannot open "./small/ttowc.sh.smt" (No such file or directory)
./ttowc.sh: line 6: ./text/wc: No such file or directory
awk: cannot open "./small/wc.smt" (No such file or directory)
./ttowc.sh: line 6: ./text/wc.awk: No such file or directory
awk: cannot open "./small/wc.awk.smt" (No such file or directory)

# https://news.mynavi.jp/article/bashonwindows-17/
# https://qiita.com/kaizen_nagoya/items/319672853519990cee42

 for File in *; do
	    tr 'A-Z' 'a-z' < ./${File} > ../small/${File}
	    awk -f wc.awk ../small/${File} > ../wc/${File}.wc

root@bbf22e8ed49c:/rmp/llm# cd text
root@bbf22e8ed49c:/rmp/llm/text# ../ttowc.sh
../ttowc.sh: line 6: ./text/2101.00678v1.txt: No such file or directory
../ttowc.sh: line 7: ./wc/2101.00678v1.txt.wc: No such file or directory
../ttowc.sh: line 6: ./text/2309.14322v2.txt: No such file or directory
../ttowc.sh: line 7: ./wc/2309.14322v2.txt.wc: No such file or directory
../ttowc.sh: line 6: ./text/2311.16502v4.txt: No such file or directory
../ttowc.sh: line 7: ./wc/2311.16502v4.txt.wc: No such file or directory
../ttowc.sh: line 6: ./text/2408.16293v1.txt: No such file or directory
../ttowc.sh: line 7: ./wc/2408.16293v1.txt.wc: No such file or directory
../ttowc.sh: line 6: ./text/old: No such file or directory
../ttowc.sh: line 7: ./wc/old.wc: No such file or directory





for File in *.txt; do

    tr 'A-Z' 'a-z' < ./${File}.txt > ../small/${File}.smt
    awk -f wc.awk ../small/${File}.smt > ../wc/${File}.wc


oot@bbf22e8ed49c:/rmp/llm/text# ./ttowc.sh 
./ttowc.sh: line 7: ./2101.00678v1.txt.txt: No such file or directory
awk: cannot open "wc.awk" (No such file or directory)
./ttowc.sh: line 7: ./2309.14322v2.txt.txt: No such file or directory
awk: cannot open "wc.awk" (No such file or directory)
./ttowc.sh: line 7: ./2311.16502v4.txt.txt: No such file or directory
awk: cannot open "wc.awk" (No such file or directory)
./ttowc.sh: line 7: ./2408.16293v1.txt.txt: No such file or directory
awk: cannot open "wc.awk" (No such file or directory)

root@bbf22e8ed49c:/rmp/llm/text# ./ttowc.sh 
awk: cannot open "wc.awk" (No such file or directory)
awk: cannot open "wc.awk" (No such file or directory)
awk: cannot open "wc.awk" (No such file or directory)
awk: cannot open "wc.awk" (No such file or directory)
root@bbf22e8ed49c:/rmp/llm/text# cp ../wc.awk .
root@bbf22e8ed49c:/rmp/llm/text# ./ttowc.sh 



Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?