diff options
author | haturatu <taro@eyes4you.org> | 2024-10-09 01:36:19 +0900 |
---|---|---|
committer | haturatu <taro@eyes4you.org> | 2024-10-09 01:36:19 +0900 |
commit | 031b8014901bd101d92af148a841e9b9c8fb59f5 (patch) | |
tree | e284928681323b7bebf08f34bb15db8a11a20f5c | |
parent | 736e6f490c6a9555141188c78c6aa48782d3b5b6 (diff) |
fi
-rwxr-xr-x | geturl.rb | 4 | ||||
-rwxr-xr-x | md.rb | 4 |
2 files changed, 4 insertions, 4 deletions
@@ -7,7 +7,7 @@ require 'nokogiri' require 'charlock_holmes' require 'unicode_utils' -$FILE_PATH = './ok' +$FILE_PATH = '/Your/URLs/list/file' $RESULT_FILE = 'Result' $OTHER_ERROR_FILE = 'Other' $CONCURRENCY = 10 @@ -17,7 +17,7 @@ def is_garbled?(text) end def clean_title(title) - title = title.chars.reject { |ch| UnicodeUtils.general_category(ch).start_with?('C') && !['(', ')', '[', ']', '{', '}', '【', '】', '「', '」', '(' ,')' ].include?(ch) }.join + title = title.chars.reject { |ch| UnicodeUtils.general_category(ch).start_with?('C') && !['(', ')', '[', ']', '{', '}', '【', '】', '【', '】', '「', '」', '(' ,')' ].include?(ch) }.join title = UnicodeUtils.nfkc(title) title = title.chars.select(&:valid_encoding?).join title.strip @@ -3,7 +3,7 @@ def convert_to_markdown(input_file, output_file) content = File.read(input_file, encoding: 'utf-8') - pairs = content.scan(/URL: (.*?)\nTitle: (.*?)\n/m) + pairs = content.scan(/URL: (.*?)\nタイトル: (.*?)\n/m) File.open(output_file, 'w', encoding: 'utf-8') do |f| pairs.each do |url, title| @@ -14,7 +14,7 @@ def convert_to_markdown(input_file, output_file) title = url.split('/')[-1] if title.strip.empty? # 特殊文字をエスケープ - title = title.gsub('[', '\\[').gsub(']', '\\]') + title = title.gsub(/[\[\]\(\)\{\}]/) { |m| "\\#{m}" } # md形式のリンクを作成 markdown_link = "[#{title}](#{url})\n\n" |