JustPaste.it

get_missing_figures.lua

http=require'socket.http'
start_topic=15000
end_topic=18000
urlbit1="http://www.volvo300mania.com/uk/forum/viewtopic.php?t="
urlbit2="&start="
for topic=start_topic,end_topic do
        old_url_text_length=0
        saved_pages=""
        for start_page=0, 15000, 15 do
            url=urlbit1..topic..urlbit2..start_page
            url_text, statusCode, headers, statusText = http.request(url)
            if statusCode ~=200 then
               print("Topic "..topic.." does not exist.")
               break
            end
            worked_OK, url_text_length=pcall(string.len,url_text)
            if not worked_OK then print("Error getting length of text for topic: "..topic) break
            elseif start_page==0 then print("New topic: "..topic)
            elseif url_text_length==old_url_text_length then
               print("Reached end of this topic.")
               break
            end
            old_url_text_length=url_text_length
            newfile=topic.."_"..start_page..".pdf"
            if string.find(url_text,"Attachments") then
               print("Outlined attachment present, will save pages as screen view.")
               wkhtmltopdf_string="wkhtmltopdf ".."\""..url.."\" "..newfile
               --print(wkhtmltopdf_string)
               if pcall(os.execute, wkhtmltopdf_string)
                 then
                     print("Saved: "..newfile)
                     saved_pages=saved_pages..newfile.." "
                     else
                  print("Error converting topic: "..topic.." to pdf.")
               end
            end
        end
        if saved_pages~="" then
            print("Saved pages: "..saved_pages)
            combined_file=topic.."_extra_pictures.pdf"
            _, count = string.gsub(saved_pages, ".pdf ", ".pdf ")
            if count>1 then
              pcall(os.execute, "pdfunite "..saved_pages..combined_file)
              print("Combined as: "..combined_file)
              pcall(os.execute, "rm "..saved_pages)
              print("Removed originals: "..saved_pages)
            elseif count==1 then
              pcall(os.execute, "mv "..saved_pages..combined_file)
              print("Renamed original: "..saved_pages.." to: "..combined_file)
            end
        end
end