extrage-profilele-de-la-pmb.php 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223
  1. <?php
  2. /**
  3. * File name:
  4. *
  5. *
  6. * (C) Copyright 2013 Friedrich-Ebert-Stiftung (http://fes.ro)
  7. * Author: Tiberiu C. Turbureanu (tct@ceata.org)
  8. *
  9. * This file is part of the project funded by FES
  10. *
  11. * This is free software; you can redistribute it and/or modify
  12. * it under the terms of the GNU Affero General Public License as published by
  13. * the Free Software Foundation; either version 3 of the License, or
  14. * (at your option) any later version.
  15. *
  16. * This program is distributed in the hope that it will be useful,
  17. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  19. * GNU Affero General Public License for more details.
  20. *
  21. * You should have received a copy of the GNU Affero General Public License
  22. * along with this program; if not, write to the Free Software
  23. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  24. */
  25. require_once 'utile.php';
  26. $names = array();
  27. // Load the list of persons
  28. $xml = new DOMDocument();
  29. $xml->load('../20131013-consilieri.xml');
  30. $xpath = new DOMXpath($xml);
  31. $persons = $xpath->query("/xml/person");
  32. foreach($persons as $person)
  33. {
  34. $dataset = array();
  35. $dataset[] = $xpath->query("comname", $person)->item(0)->nodeValue;
  36. $dataset[] = $xpath->query("surname", $person)->item(0)->nodeValue;
  37. $names[] = $dataset;
  38. }
  39. $profiles = array();
  40. $html = file_get_contents('http://www.pmb.ro/institutii/cgmb/componenta/consilieri/consilieri.php');
  41. $doc = new DOMDocument();
  42. $doc->loadHTML($html);
  43. $xpath = new DOMXpath($doc);
  44. $elements = $xpath->query("//table[@id='tabel']/tbody/tr[@class='table_row0']");
  45. foreach ($elements as $e)
  46. {
  47. $dataset = array();
  48. $iname = "";
  49. // Get row from person table
  50. $row = $xpath->query("td", $e);
  51. $name = $row->item(1)->nodeValue;
  52. $dataset['name'] = substr($name, 2);
  53. $surname = fărăDiacritice(lcfirst(strtok($dataset['name'], "– ")));
  54. if ($surname == 'alexandre') $surname = 'fontoura';
  55. if ($surname == 'boaja')
  56. {
  57. $dataset['name'] = utf8_encode("Boajă Minică");
  58. }
  59. if ($surname == 'popescu')
  60. {
  61. $comname = "";
  62. while ($r = strtok("– "))
  63. {
  64. $comname = lcfirst($r);
  65. }
  66. $iname = $comname.'-'.$surname;
  67. }
  68. else
  69. {
  70. foreach ($names as $name)
  71. if ($name[1] == $surname)
  72. $iname = $name[0].'-'.$name[1];
  73. }
  74. $dataset['name'] = cuLiniuță(trim(cuDiacriticeCorecte($dataset['name'])));
  75. $birth = $row->item(2)->nodeValue;
  76. $dataset['date'] = substr(strtok($birth, ", "), 2);
  77. $dataset['place'] = trim(cuDiacriticeCorecte(substr($birth, 14)));
  78. if ($iname == 'violeta-popescu')
  79. {
  80. $dataset['place'] = "Târgu Jiu, Jud. Gorj";
  81. }
  82. else if ($surname == 'pieptea')
  83. {
  84. $dataset['place'] = "Com. Văleni, Jud. Olt";
  85. }
  86. $occup = $row->item(3)->nodeValue;
  87. $dataset['occup'] = trim(cuDiacriticeCorecte(substr($occup, 2)));
  88. if ($surname == 'nicolescu')
  89. {
  90. $dataset['occup'] = "Cadru universitar";
  91. }
  92. else if ($surname == 'pieptea')
  93. {
  94. $dataset['occup'] = "Consilier C.G.M.B.";
  95. }
  96. $prof = $row->item(4)->nodeValue;
  97. $dataset['prof'] = trim(cuDiacriticeCorecte(substr($prof, 2)));
  98. $affil = $row->item(5)->nodeValue;
  99. $dataset['affil'] = fărăPunctuație(trim(cuDiacriticeCorecte(substr($affil, 2))));
  100. $profiles[$iname] = $dataset;
  101. }
  102. $declarations = array();
  103. $html = file_get_contents('http://www.pmb.ro/institutii/declaratii_avere/d_avere_12_16_cgmb.php');
  104. $doc = new DOMDocument();
  105. $doc->loadHTML($html);
  106. $xpath = new DOMXpath($doc);
  107. $elements = $xpath->query("//table[@class='tabel']/tbody/tr[not(@class)]");
  108. foreach ($elements as $e)
  109. {
  110. $dataset = array();
  111. $iname = "";
  112. // Get row from person table
  113. $row = $xpath->query("td", $e);
  114. $res = $xpath->query("p", $row->item(0));
  115. if ($res->length)
  116. $name = $res->item(0)->nodeValue;
  117. else
  118. $name = $row->item(0)->nodeValue;
  119. $surname = fărăDiacritice(lcfirst(strtok($name, "– ")));
  120. if ($surname == 'alexandre') $surname = 'fontoura';
  121. if ($surname == 'popescu')
  122. {
  123. $comname = "";
  124. while ($r = strtok("– "))
  125. {
  126. $comname = lcfirst($r);
  127. }
  128. $iname = $comname.'-'.$surname;
  129. }
  130. else
  131. {
  132. foreach ($names as $name)
  133. if ($name[1] == $surname)
  134. $iname = $name[0].'-'.$name[1];
  135. if ($iname == "")
  136. continue;
  137. }
  138. // Start wealth
  139. $wstart = "";
  140. $res = $xpath->query("a", $row->item(1));
  141. if ($res->length)
  142. $wstart = $res->item(0)->getAttribute('href');
  143. $dataset['wstart'] = $wstart;
  144. // Start interests
  145. $istart = "";
  146. $res = $xpath->query("a", $row->item(2));
  147. if ($res->length)
  148. $istart = $res->item(0)->getAttribute('href');
  149. $dataset['istart'] = $istart;
  150. // 2012 wealth
  151. $w2012 = "";
  152. $res = $xpath->query("a", $row->item(3));
  153. if ($res->length)
  154. $w2012 = $res->item(0)->getAttribute('href');
  155. $dataset['w2012'] = $w2012;
  156. // 2012 interests
  157. $i2012 = "";
  158. $res = $xpath->query("a", $row->item(4));
  159. if ($res->length)
  160. $i2012 = $res->item(0)->getAttribute('href');
  161. $dataset['i2012'] = $i2012;
  162. $declarations[$iname] = $dataset;
  163. }
  164. foreach ($names as $n)
  165. {
  166. $in = $n[0].'-'.$n[1];
  167. $xml = '<?xml version="1.0" encoding="UTF-8"?>'.PHP_EOL;
  168. $xml .= '<xml>'.PHP_EOL;
  169. $xml .= cuEtichetă("comname", extragePrenumele($profiles[$in]['name']));
  170. $xml .= cuEtichetă("surname", extrageNumele($profiles[$in]['name']));
  171. $xml .= cuEtichetă("fullname", inverseazăNumele($profiles[$in]['name']));
  172. $xml .= cuEtichetă("birthdate", $profiles[$in]['date']);
  173. $xml .= cuEtichetă("birthplace", $profiles[$in]['place']);
  174. $xml .= cuEtichetă("occupation", $profiles[$in]['occup']);
  175. $xml .= cuEtichetă("profession", $profiles[$in]['prof']);
  176. $xml .= cuEtichetă("affiliation", $profiles[$in]['affil']);
  177. $xml .= '<declarations>'.PHP_EOL;
  178. $xml .= "<declaration type='wealth' year='2012' file='".$declarations[$in]['wstart']."' />".PHP_EOL;
  179. $xml .= "<declaration type='wealth' year='2013' file='".$declarations[$in]['w2012']."' />".PHP_EOL;
  180. $xml .= "<declaration type='interest' year='2012' file='".$declarations[$in]['istart']."' />".PHP_EOL;
  181. $xml .= "<declaration type='interest' year='2013' file='".$declarations[$in]['i2012']."' />".PHP_EOL;
  182. $xml .= '</declarations>'.PHP_EOL;
  183. $xml .= '</xml>'.PHP_EOL;
  184. $file = fopen('../profile/00-pmb/'.$n[0].'-'.$n[1].'.xml', "w");
  185. fwrite($file, $xml);
  186. fclose($file);
  187. }
  188. ?>