Reputation: 3
using System;
using System.ComponentModel;
using System.IO;
using System.Net.Http;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading;
using System.Threading.Tasks;
using System.Windows.Forms;
using System.Collections.Generic;
using System.Collections.Concurrent;
using System.Net;
namespace Seranking_Scraper
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
btnStop.Enabled = false;
}
CancellationTokenSource cts = null;
List<string> proxyList = new List<string>();
int _proxyIndex = 0;
int retries = 0;
int linesCount = 1;
int totalLinesCount;
List<Task> tasks = null;
string regex = "XXXXXX";
private static HttpClient client = null;
private async void BtnStart_Click(object sender, EventArgs e)
{
dataGridView1.Rows.Clear();
cts = new CancellationTokenSource();
btnStart.Enabled = false;
btnStop.Enabled = true;
btnExport.Enabled = false;
btnOpen.Enabled = false;
btnClear.Enabled = false;
totalLinesCount = listBox_domains.Items.Count;
List<string> urls = new List<string>();
for (int i = 0; i < listBox_domains.Items.Count; i++)
{
urls.Add(listBox_domains.Items[i].ToString());
}
if (textBox_Proxies.Text != null)
{
for (int i = 0; i < textBox_Proxies.Lines.Length; i++)
{
proxyList.Add(textBox_Proxies.Lines[i]);
}
}
var maxThreads = (int)numericUpDown1.Value;
var q = new ConcurrentQueue<string>(urls);
tasks = new List<Task>();
for (int n = 0; n < maxThreads; n++)
{
tasks.Add(Task.Run(async () =>
{
while (q.TryDequeue(out string url))
{
await SendHttpRequestAsync(url, cts.Token);
Thread.Sleep(1);
if (cts.IsCancellationRequested)
{
break;
}
foreach (Task eTask in tasks)
{
if (eTask.IsCompleted)
eTask.Dispose();
}
}
}, cts.Token));
}
await Task.WhenAll(tasks).ContinueWith((FinalWork) =>
{
Invoke(new Action(() =>
{
btnStart.Enabled = true;
btnExport.Enabled = true;
btnOpen.Enabled = true;
btnClear.Enabled = true;
timer1.Enabled = false;
timer1.Stop();
progressBar1.Style = ProgressBarStyle.Blocks;
progressBar1.Invoke((Action)(() => progressBar1.Value = 100));
if(!cts.IsCancellationRequested)
MessageBox.Show(new Form { TopMost = true }, "Completed!", "Status", MessageBoxButtons.OK, MessageBoxIcon.Exclamation);
}));
}, TaskContinuationOptions.OnlyOnRanToCompletion);
//var options = new ParallelOptions()
//{
// MaxDegreeOfParallelism = (int)numericUpDown1.Value
//};
//Parallel.For(0, listBox_domains.Items.Count, async j =>
//{
// await SendHttpRequestAsync(listBox_domains.Items[j].ToString());
// Thread.Sleep(10);
//});
}
private string GetProxy()
{
if (proxyList.Count <=0) return null;
if (_proxyIndex >= proxyList.Count - 1) _proxyIndex = 0;
var proxy = proxyList[_proxyIndex];
_proxyIndex++;
return proxy;
}
private async Task ToCsV(DataGridView dGV, string filename)
{
await Task.Yield();
string stOutput = "";
// Export titles:
string sHeaders = "";
for (int j = 0; j < dGV.Columns.Count; j++)
sHeaders = sHeaders.ToString() + Convert.ToString(dGV.Columns[j].HeaderText) + "\t";
stOutput += sHeaders + "\r\n";
// Export data.
for (int i = 0; i < dGV.RowCount - 1; i++)
{
string stLine = "";
for (int j = 0; j < dGV.Rows[i].Cells.Count; j++)
stLine = stLine.ToString() + Convert.ToString(dGV.Rows[i].Cells[j].Value) + "\t";
stOutput += stLine + "\r\n";
//progressBar1.Style = ProgressBarStyle.Blocks;
//progressBar1.Value = (i / 100) * 100;
}
Encoding utf16 = Encoding.GetEncoding(1254);
byte[] output = utf16.GetBytes(stOutput);
FileStream fs = new FileStream(filename, FileMode.Create);
BinaryWriter bw = new BinaryWriter(fs);
bw.Write(output, 0, output.Length); //write the encoded file
bw.Flush();
bw.Close();
fs.Close();
}
private async Task SendHttpRequestAsync(string url, CancellationToken ct)
{
var httpClientHandler = new HttpClientHandler
{
Proxy = new WebProxy(GetProxy(), false),
UseProxy = true
};
//httpClientHandler.MaxConnectionsPerServer = 1;
httpClientHandler.AllowAutoRedirect = true;
httpClientHandler.MaxAutomaticRedirections = 3;
try
{
using (client = new HttpClient(httpClientHandler))
{
client.Timeout = TimeSpan.FromMilliseconds(1000 * (int)numericUpDown_timeout.Value); //adjust based on your network
client.DefaultRequestHeaders.ConnectionClose = true;
ServicePointManager.DefaultConnectionLimit = 100;
//var byteArray = Encoding.ASCII.GetBytes("username:password1234");
//client.DefaultRequestHeaders.Authorization = new System.Net.Http.Headers.AuthenticationHeaderValue("Basic", Convert.ToBase64String(byteArray));
try
{
using (HttpResponseMessage response = await client.GetAsync("xxxx))
{
if (response.IsSuccessStatusCode)
{
using (HttpContent content = response.Content)
{
//response.Dispose();
string result = await content.ReadAsStringAsync();
Regex match = new Regex(regex, RegexOptions.Singleline);
MatchCollection collection = Regex.Matches(result, regex);
try
{
if (collection.Count > 0)
{
await AddDataToDgv(url, collection[0].ToString(), collection[1].ToString(), collection[2].ToString());
}
else if (result.Contains("No data for your search query"))
{
await AddDataToDgv(url, "nodata", "nodata", "nodata");
}
}
catch (Exception ex)
{
//MessageBox.Show(ex.ToString());
await AddDataToDgv(url, "errorCount", "errorCount", "errorCount");
}
}
}
else
{
await RetriesProxyFail(url, ct);
}
}
}catch(Exception ex)
{
await RetriesProxyFail(url, ct, ex);
client.Dispose();
}
}
}
catch (Exception ex)
{
MessageBox.Show(ex.ToString());
}
}
public async Task RetriesProxyFail(string url, CancellationToken ct, Exception ex = null)
{
client.DefaultRequestHeaders.ConnectionClose = true;
if (!cts.IsCancellationRequested)
{
retries++;
if (retries > (int)numericUpDown_Retries.Value)
{
retries = 0;
Invoke(new Action(async () =>
{
lbl_RemainingLines.Text = "Remaining Urls: " + (totalLinesCount - (dataGridView1.Rows.Count)).ToString();
await AddDataToDgv(url, "timeout", "timeout", "timeout");
}));
}
else
{
await SendHttpRequestAsync(url, ct);
}
}
}
public async Task AddDataToDgv(string url, string tcost, string tTraffic, string tValue)
{
try
{
await Task.Yield();
Invoke(new Action(() =>
{
dataGridView1.Rows.Add(url, tcost, tTraffic, tValue);
lbl_RemainingLines.Text = "Remaining Urls: " + (totalLinesCount - (dataGridView1.Rows.Count)).ToString();
if (Application.RenderWithVisualStyles)
progressBar1.Style = ProgressBarStyle.Marquee;
else
{
progressBar1.Style = ProgressBarStyle.Continuous;
progressBar1.Maximum = 100;
progressBar1.Value = 0;
timer1.Enabled = true;
}
}));
}
catch (Exception ex)
{
Invoke(new Action(async () =>
{
lbl_RemainingLines.Text = "Remaining Urls: " + (totalLinesCount - (dataGridView1.Rows.Count)).ToString();
await AddDataToDgv(url, "error", "error", "error");
}));
}
}
private void BtnOpen_Click(object sender, EventArgs e)
{
linesCount = 1;
try
{
openFileDialog1.ShowDialog();
openFileDialog1.Title = "Please select text file that contains root domains.";
openFileDialog1.DefaultExt = "txt";
openFileDialog1.Filter = "txt files (*.txt)|*.txt|All files (*.*)|*.*";
openFileDialog1.FilterIndex = 2;
openFileDialog1.CheckFileExists = true;
openFileDialog1.CheckPathExists = true;
this.openFileDialog1.Multiselect = true;
myWorker_ReadTxtFile = new BackgroundWorker();
myWorker_ReadTxtFile.DoWork += new DoWorkEventHandler(MyWorker_ReadTxtFile_DoWork);
myWorker_ReadTxtFile.RunWorkerCompleted += new RunWorkerCompletedEventHandler(MyWorker_ReadTxtFile_RunWorkerCompleted);
myWorker_ReadTxtFile.ProgressChanged += new ProgressChangedEventHandler(MyWorker_ReadTxtFile_ProgressChanged);
myWorker_ReadTxtFile.WorkerReportsProgress = true;
myWorker_ReadTxtFile.WorkerSupportsCancellation = true;
listBox_domains.Items.Clear();
foreach (String fileName_Domains in openFileDialog1.FileNames)
{
myWorker_ReadTxtFile.RunWorkerAsync(fileName_Domains);
}
}
catch (Exception ex)
{
}
}
private void OpenFileDialog1_FileOk(object sender, CancelEventArgs e)
{
}
private void MyWorker_ReadTxtFile_ProgressChanged(object sender, ProgressChangedEventArgs e)
{
listBox_domains.Items.Add(e.UserState.ToString());
lbl_totallines.Text = "TLines: " + linesCount++.ToString();
}
private void MyWorker_ReadTxtFile_RunWorkerCompleted(object sender, RunWorkerCompletedEventArgs e)
{
}
public void ReadLinesToListBox(string fileName_Domains)
{
using (StreamReader sr = File.OpenText(fileName_Domains))
{
string s = String.Empty;
while ((s = sr.ReadLine()) != null)
{
myWorker_ReadTxtFile.ReportProgress(0, s);
Thread.Sleep(1);
}
}
}
private void MyWorker_ReadTxtFile_DoWork(object sender, DoWorkEventArgs e)
{
BackgroundWorker sendingWorker = (BackgroundWorker)sender;//Capture the BackgroundWorker that fired the event
object fileName_Domains = (object)e.Argument;//Collect the array of objects the we received from the main thread
string s = fileName_Domains.ToString();//Get the string value
ReadLinesToListBox(s);
}
private void Label2_Click(object sender, EventArgs e)
{
}
private void BtnStop_Click(object sender, EventArgs e)
{
if (cts != null)
{
cts.Cancel();
cts.Dispose();
btnStart.Enabled = true;
btnStop.Enabled = false;
btnExport.Enabled = true;
btnOpen.Enabled = true;
btnClear.Enabled = true;
progressBar1.Style = ProgressBarStyle.Blocks;
progressBar1.Value = 100;
MessageBox.Show(new Form { TopMost = true }, "Cancelled!", "Status", MessageBoxButtons.OK, MessageBoxIcon.Exclamation);
}
}
private async void BtnExport_Click(object sender, EventArgs e)
{
SaveFileDialog sfd = new SaveFileDialog();
sfd.Filter = "Excel Documents (*.xls)|*.xls";
sfd.FileName = "Site Metrics";
if (sfd.ShowDialog() == DialogResult.OK)
{
await ToCsV(dataGridView1, sfd.FileName); // Here dataGridview1 is your grid view name
}
MessageBox.Show(new Form { TopMost = true }, "Exported!", "Status", MessageBoxButtons.OK, MessageBoxIcon.Exclamation);
}
private void BtnClear_Click(object sender, EventArgs e)
{
listBox_domains.Items.Clear();
dataGridView1.Rows.Clear();
}
private void Form1_Load(object sender, EventArgs e)
{
this.FormBorderStyle = System.Windows.Forms.FormBorderStyle.FixedSingle;
}
private void BtnPasteProxies_Click(object sender, EventArgs e)
{
textBox_Proxies.Text = Clipboard.GetText();
}
private void Timer1_Tick(object sender, EventArgs e)
{
progressBar1.Value += 5;
if (progressBar1.Value > 100)
progressBar1.Value = 0;
}
}
}
The above code is working fine but the problem is it slowly increasing the memory usage. When I start it use some around 40 mb slowly its increasing I tried memory tests, the increase in memory is slow but its increasing slowly.
Any one help me whats wrong in my code?
I am testing with some 4k urls, when it reaches 2k urls my memory usage is 60 MB. Still increasing slowly.
Upvotes: 0
Views: 142
Reputation: 9804
A slow memory increase is totally normal. .NET uses Garbage Collection Memory Management approach. Now while the collection runs, every other thread has to stop. This can cause human noticeable stalls. And is one big reason GC and realtime programming do not mix that well.
To avoid those stalls, the GC is lazy with running. It is aiming for only running once - at application closure. Delays will not be that noticeable then. And it might even be able to save work, as the memory will be handed back to the OS afterwards and not be reused. Short of running finalizers, there might not be much work to be done.
There are only a few things that can force it to run earlier:
GC.Collect();
It allows you to force a collection now. Note that this only adviseable for debugging and testing. Production code should not have thisUpvotes: 1