Thank you to anyone who has already donated - your generous donations helped make three months of treatment possible.

My brother Nate continues to fight stage IV Hodgkin's lymphoma. He's just 31, with a wife and baby girl. They have no active income (since he's been unable to return to work), no insurance, and cannot afford the treatment he needs. Nate and his family need your help. Please consider a donation, every dollar helps. Thanks.


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
// Web Crawler in AjSharp 
// http://ajlopez.wordpress.com/category/ajsharp/

// Build and launch agents

object WebCrawler
{
	sub Process(url, fn)
	{
		uri = new System.Uri(url);
		downloader = new Downloader();
		harvester = new Harvester();
		resolver = new Resolver(uri,5);
		processor = new Processor(fn);
		
		downloader.Harvester = harvester;
		downloader.Processor = processor;
		harvester.Resolver = resolver;
		resolver.Downloader = downloader;
		
		downloader.Process(uri, 0);
	}
}

// Downloads a page

agent Downloader 
{
	sub Process(uri,depth)
	{
		client = new System.Net.WebClient();
		content = client.DownloadString(uri);
		PrintLine("Downloaded: " + uri);
		this.Harvester.Process(uri,depth,content);
		this.Processor.Process(uri, content);
	}
}

// Process the content retrieved

agent Processor
{
	function Processor(fn)
	{
		this.fn = fn; // function to invoke
	}
	
	sub Process(uri, content)
	{
		// Add your logic
		this.fn(uri, content);
	}
}

// Get links from page

agent Harvester
{
	sub Process(uri,depth,content)
	{
		matches = System.Text.RegularExpressions.Regex.Matches(content, "href=\\s*\"([^&\"]*)\"");
		results = new List();
		
		foreach (match in matches) {
			value = match.Groups[1].Value;
			
			if (!results.Contains(value))
				results.Add(value);
		}
		
		foreach (result in results) 
			if (result.StartsWith("http"))
				this.Resolver.Process(new System.Uri(result), depth+1);
	}
}

// Filter invalid or already processed links

agent Resolver
{
	var processed = new List();	
	
	function Resolver(uri,maxdepth)
	{
		this.host = uri.Host;
		this.maxdepth = maxdepth;
	}
	
	sub Process(uri,depth) 
	{
		if (depth > this.maxdepth)
			return;
			
		if (uri.Host != this.host)
			return;
		
		if (uri.Scheme != System.Uri.UriSchemeHttp && uri.Scheme != System.Uri.UriSchemeHttps)
			return;
			
		if (processed.Contains(uri))
			return;
			
		processed.Add(uri);
			
		PrintLine("New Link: " + uri);
		this.Downloader.Process(uri,depth); 		
	}
}

// Example

WebCrawler.Process("http://ajlopez.wordpress.com", function(uri,content) { PrintLine("From ajlopez.wordpress "+uri);});
WebCrawler.Process("http://ajlopez.zoomblog.com", function(uri,content) { PrintLine("From ajlopez.zoomblog "+uri);});