1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
|
require 'rubygems'
require 'open-uri'
require 'net/http'
require 'hpricot'
require 'atom/entry'
require 'atom/collection'
urls_to_import = "urls.txt"
wp_blog_host = "livinglife.sweetperceptions.com"
wp_blog_uri = "http://#{wp_blog_host}"
wp_base = "http://#{wp_blog_host}/wp-app.php"
wp_blog_username = "myusername"
wp_blog_password = "mypassword"
your_blog_source = "http://sweetperceptions.i.ph"
which_pages = 1..19
authors = {
'Maricris Nonato' => {'user' => 'myusername', 'password' => 'mypassword'}
}
registered_categories = ["About me", "Artistry", "Cool Finds", "Dreams", "Events", "Health and Beauty", "Horoscope", "Living Life", "Meme", "Movies", "Music", "Notes", "Pet Love", "Quotes", "Random thoughts", "Stories to share", "Techie", "Travel"]
synonym_categories = {
"About me" => ["me"],
"Artistry" => ["poem"],
"Cool Finds" => ["cool"],
"Dreams" => ["dream","dreams"],
"Events" => ["event", "bday", "birthday", "Christmas", "New year", "new-year", "celebration"],
"Health and Beauty" => ["health", "sickness", "headache", "fever", "cancer"],
"Horoscope" => ["cookie", "fortune", "horoscope", "astrology", "psych"],
"Living Life" => ["life", "kalokohan"],
"Meme" => ["meme"],
"Movies" => ["hollywood", "movie", "movies", "movie-lines", "happy-feet"],
"Music" => ["song", "songs", "singer", "music", "ost"],
"Notes" => ["notes"],
"Pet Love" => ["pet", "cat", "dog", "animal", "animals", "pets"],
"Quotes" => ["quote", "quotes"],
"Random thoughts" => ["thought", "thoughts", "think", "logic"],
"Stories to share" => ["story", "stories", "adventure"],
"Techie" => ["tech", "techie", "work", "web2.0", "development", "software", "online", "skype", "pc"],
"Travel" => ["philippines", "travel", "province"],
}
urls = Array.new
which_pages.each do |page|
from = Hpricot(open(your_blog_source + "/page/#{page.to_s}/"))
urls << (from/"h3[@class='entrytitle']/a").collect{|x| x['href']}
end
urls = urls.flatten.compact
urls.each { |target|
doc = Hpricot(open(target))
title = (CGI::unescapeHTML((doc/"div/h3[@class='entrytitle']/a").inner_html.strip)).gsub(/\r\n/, '')
author = "Chris"
timestr = (doc/"div[@class='meta-post']").inner_html[/\d+:\d\d:\d\d/]
datestr = ((doc/"div/span[@class='date']").inner_html.strip).gsub(/\r\n/, '')
datestr = datestr + " " + timestr
datestr = DateTime.parse(datestr).strftime('%a, %-d %b %Y %T -0500')
hExcerpt = ((doc/"div[@class='entry_summary']").inner_html).gsub(/\r\n/, '')
filtered_tags = []
tags = (doc/"div[@class='tag-list']/a").collect{|x| x.inner_html}
filtered_tags << tags.collect{|x| x if registered_categories.include?(x)}.compact
synonym_categories.keys.each do |syn|
filtered_tags << tags.collect{|x| syn if (synonym_categories[syn]).include?(x)}.compact
end
tags = filtered_tags.flatten.compact.uniq.join(',')
entry_id = "postentry-#{doc.at("div[@class='blog']")['id'].split('-').last}"
contents = (doc/"##{entry_id}")
(doc/"##{entry_id}/h3").remove
(doc/"##{entry_id}/span[@class='date']").remove
(doc/"##{entry_id}/div[@class='tag-list']").remove
(doc/"##{entry_id}/div[@class='meta-post']").remove
contents = (doc/"##{entry_id}").inner_html.gsub("\n <!-- google_ad_section_start -->\n \n <!-- google_ad_section_end -->\n \n <!-- google_ad_section_start -->\n",'').gsub("\n <!-- google_ad_section_end -->\n \n",'')
content = contents
author = Atom::Author.new
author.name = author
author.uri = wp_blog_uri
entry = Atom::Entry.new
entry.title = title
entry.summary = hExcerpt
entry.content = content
entry.content.type = "html"
entry.published = datestr
entry.updated = datestr
entry.tag_with(tags, ',')
entry.authors << author
req = Atom::HTTP.new
req.user = wp_blog_username
req.pass = wp_blog_password
req.always_auth = :basic
c = Atom::Collection.new(wp_base + "/posts", req)
res = c.post! entry
puts "Imported URL: #{target}, at #{datestr}, #{res.message}\n"
}
|