Queue.pm.bak 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301
  1. # ABSTRACT: Scrappy HTTP Request Flow-Control System
  2. # Dist::Zilla: +PodWeaver
  3. package Scrappy::Queue;
  4. BEGIN {
  5. $Scrappy::Queue::VERSION = '0.94112090';
  6. }
  7. # load OO System
  8. use Moose;
  9. # load other libraries
  10. use Array::Unique;
  11. use URI;
  12. # queue and cursor variables for navigation
  13. our @_queue = ();
  14. tie @_queue, 'Array::Unique';
  15. our $_cursor = -1;
  16. sub list {
  17. return @_queue;
  18. }
  19. sub add {
  20. my $self = shift;
  21. my @urls = @_;
  22. # validate and formulate proper URLs
  23. for (my $i = 0; $i < @urls; $i++) {
  24. my $u = URI->new($urls[$i]);
  25. if ('URI::' =~ ref $u) {
  26. $urls[$i] = $u->as_string;
  27. }
  28. else {
  29. unless ($urls[$i] =~ /\w{2,}\.\w{2,}/) {
  30. delete $urls[$i];
  31. }
  32. }
  33. }
  34. push @_queue, @urls;
  35. return $self;
  36. }
  37. sub clear {
  38. my $self = shift;
  39. @_queue = ();
  40. $_cursor = -1;
  41. return $self;
  42. }
  43. sub reset {
  44. my $self = shift;
  45. $_cursor = -1;
  46. return $self;
  47. }
  48. sub current {
  49. my $self = shift;
  50. return $_queue[$_cursor];
  51. }
  52. sub next {
  53. my $self = shift;
  54. return $_queue[++$_cursor];
  55. }
  56. sub previous {
  57. my $self = shift;
  58. return $_queue[--$_cursor];
  59. }
  60. sub first {
  61. my $self = shift;
  62. $_cursor = 0;
  63. return $_queue[$_cursor];
  64. }
  65. sub last {
  66. my $self = shift;
  67. $_cursor = scalar(@_queue) - 1;
  68. return $_queue[$_cursor];
  69. }
  70. sub index {
  71. my $self = shift;
  72. $_cursor = shift || 0;
  73. return $_queue[$_cursor];
  74. }
  75. sub cursor {
  76. return $_cursor;
  77. }
  78. 1;
  79. __END__
  80. =pod
  81. =head1 NAME
  82. Scrappy::Queue - Scrappy HTTP Request Flow-Control System
  83. =head1 VERSION
  84. version 0.94112090
  85. =head1 SYNOPSIS
  86. #!/usr/bin/perl
  87. use Scrappy::Queue;
  88. my $queue = Scrappy::Queue->new;
  89. $queue->add($url);
  90. while (my $url = $queue->next) {
  91. ... $queue->add(...);
  92. }
  93. =head1 DESCRIPTION
  94. Scrappy::Queue provides a system for saving URLs to a recordset/queue and iterating
  95. of them using the L<Scrappy> framework.
  96. =head1 METHODS
  97. =head2 list
  98. The list method return the list of URLs in the queue. This is returned in list
  99. context.
  100. my $queue = Scrappy::Queue->new;
  101. ...
  102. my @list = $queue->list;
  103. =head2 add
  104. The add method adds new URLs to the queue. Duplicate URLs will be ignored.
  105. my $queue = Scrappy::Queue->new;
  106. $queue->add($url);
  107. =head2 clear
  108. The clear method completely empties the queue and resets the cursor (loop position).
  109. my $queue = Scrappy::Queue->new;
  110. $queue->add(...);
  111. $queue->add(...);
  112. $queue->add(...);
  113. $queue->clear;
  114. =head2 reset
  115. The reset method resets the cursor (loop position).
  116. my $queue = Scrappy::Queue->new;
  117. $queue->add(...);
  118. $queue->add(...);
  119. $queue->add(...);
  120. while (my $url = $queue->next) {
  121. $queue->reset if ...; # beware the infinate loop
  122. }
  123. $queue->reset;
  124. =head2 current
  125. The current method returns the URL in the current loop position.
  126. my $queue = Scrappy::Queue->new;
  127. $queue->add(...);
  128. $queue->add(...);
  129. $queue->add(...);
  130. while (my $url = $queue->next) {
  131. last if ...;
  132. }
  133. print 'great' if $url eq $queue->current;
  134. =head2 next
  135. The next method moves the cursor to the next loop position and returns the URL.
  136. my $queue = Scrappy::Queue->new;
  137. $queue->add(...);
  138. $queue->add(...);
  139. $queue->add(...);
  140. while (my $url = $queue->next) {
  141. ...
  142. }
  143. =head2 previous
  144. The previous method moves the cursor to the previous loop position and returns the URL.
  145. my $queue = Scrappy::Queue->new;
  146. $queue->add(...);
  147. $queue->add(...);
  148. $queue->add(...);
  149. while (my $url = $queue->next) {
  150. ...
  151. }
  152. print $queue->previous;
  153. =head2 first
  154. The first method moves the cursor to the first loop position and returns the URL.
  155. my $queue = Scrappy::Queue->new;
  156. $queue->add(...);
  157. $queue->add(...);
  158. $queue->add(...);
  159. print $queue->first;
  160. =head2 last
  161. The last method moves the cursor to the last loop position and returns the URL.
  162. my $queue = Scrappy::Queue->new;
  163. $queue->add(...);
  164. $queue->add(...);
  165. $queue->add(...);
  166. print $queue->last;
  167. =head2 index
  168. The index method moves the cursor to the specified loop position and returns the
  169. URL. The loop position is a standard array index position.
  170. my $queue = Scrappy::Queue->new;
  171. $queue->add(...);
  172. $queue->add(...);
  173. $queue->add(...);
  174. print $queue->index(1);
  175. =head2 cursor
  176. The cursor method returns the current loop position.
  177. my $queue = Scrappy::Queue->new;
  178. print $queue->cursor;
  179. =head1 AUTHOR
  180. Al Newkirk <awncorp@cpan.org>
  181. =head1 COPYRIGHT AND LICENSE
  182. This software is copyright (c) 2010 by awncorp.
  183. This is free software; you can redistribute it and/or modify it under
  184. the same terms as the Perl 5 programming language system itself.
  185. =cut