Author: yura_nn Language: python
Description: rutracker parser Timestamp: 2017-10-04 19:40:17 +0000
View raw paste Reply
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. #
  4. # Author: yura_nn
  5. # License: GNU/GPL v3
  6. #
  7.  
  8. from bs4 import BeautifulSoup
  9. import hashlib
  10. import random
  11. import mysql.connector
  12. import time
  13. import math
  14.  
  15.  
  16. # ????? ????? ??????? ????????? ??????????? ? ???? ??????.
  17. host = "localhost"
  18. database = "rutracker"
  19. user = "user"
  20. password = "password"
  21. phpbb_prefix = "rutrk_"
  22. # ???? ? ????? xml.
  23. backup_xml = "/path/to/file.xml"
  24.  
  25. # ????????? ????????????? ????. ??????? topics, ???? topic_id.
  26. topic_id = 2
  27. # ????????? ????????????? ?????. ??????? posts, ???? post_id.
  28. post_id = 2
  29. # ????????????? ??????, ? ??????? ????? ??????????? ???? ?????????. ??????????
  30. # ????? ? ??????? forums, ???? forum_id.
  31. forum_id = 2
  32. # ????????????? ???????????? ?????? phpbb3. ??????? users, ???? user_id.
  33. user_id = 2
  34. # ??? ???????????? ?????? phpbb3.
  35. topic_user = "user"
  36. # ??????? ?????? ? ????????? ?????????.
  37. count_post = 1
  38.  
  39.  
  40. def parse_torrent(line_xml):
  41.     """ ??????? ?????? ???????????? ??????. ????????? ?????? ?????????????
  42.    ????????. ? ????? ????????? ???????, ?????????? ??????????? ???????? ?
  43.    ?????????? ???. """
  44.     soup = BeautifulSoup(line_xml, "xml")
  45.     torrent_all = soup.findAll("torrent")
  46.     torrent_id = torrent_all[0]["id"]
  47.     registred_at = torrent_all[0]["registred_at"]
  48.     torrent_size = torrent_all[0]["size"]
  49.     title = soup.title.string
  50.     title = str(title)
  51.     torrent_hash = torrent_all[1]["hash"]
  52.     tracker_id = torrent_all[1]["tracker_id"]
  53.     forum_id_old = soup.forum["id"]
  54.     forum_name = soup.forum.string
  55.     # ???????? ?????? ??????.
  56.     magnet = create_magnet(torrent_hash)
  57.     magnet = "[br]" + "?????? ??????: " + '\n' + "[code]" + magnet + \
  58.              "[/code]" + '\n'
  59.     # ???????? ????? ??? ????????? ??????? ?????.
  60.     torrent_link = create_torrent_link(torrent_hash)
  61.     torrent_link = "[url=http://itorrents.org/torrent/" + \
  62.                           torrent_link + "]??????? ????[/url]" + '\n'
  63.     hash_string = "[br]" + "???????? ??? ???????:" + '\n' + "[code]" + \
  64.                   torrent_hash + "[/code]" + '\n'
  65.     post_text = soup.content.string
  66.     # ?????????? ?????? ??????, ??????? ?????, ? ???????? ???? ??????? ????? ?
  67.     # ????.
  68.     post_text = post_text + '\n' + magnet
  69.     post_text = post_text + torrent_link
  70.     post_text = post_text + hash_string
  71.     # ? ??????? ???? ?????? ????? ??????? ???-????? ?????????.
  72.     post_checksum = post_hash(post_text)
  73.     # ????? ??????????? ???????? ???????? ???????.
  74.     time_post = math.floor(time.time())
  75.     post_table_string = {
  76.         "torrent_id": torrent_id,
  77.         "title": title,
  78.         "post_text": post_text,
  79.         "post_checksum": post_checksum,
  80.         "time_post": time_post,
  81.         "torrent_size": torrent_size,
  82.         "tracker_id": tracker_id,
  83.         "registred_at": registred_at,
  84.         "forum_id_old": forum_id_old,
  85.         "forum_name": forum_name
  86.     }
  87.     return post_table_string
  88.  
  89.  
  90. def add_post_to_base(post_table_string):
  91.     """ ??????? ??????? ?????????? ? ???????? ? ???? ??????. """
  92.     global topic_id
  93.     global post_id
  94.     global count_post
  95.     cnx = mysql.connector.connect(host=host,
  96.                                   database=database,
  97.                                   user=user,
  98.                                   password=password)
  99.     cursor = cnx.cursor()
  100.     # ???????? ????? ?????? ? ??????? ???.
  101.     part_query = "INSERT INTO " + phpbb_prefix + "topics "
  102.     query = part_query + "(topic_id,forum_id,icon_id,topic_attachment," + \
  103.             "topic_reported,topic_title,topic_poster,topic_time," + \
  104.             "topic_time_limit,topic_views,topic_status,topic_type," + \
  105.             "topic_first_post_id,topic_first_poster_name," + \
  106.             "topic_first_poster_colour,topic_last_post_id," + \
  107.             "topic_last_poster_id,topic_last_poster_name," + \
  108.             "topic_last_poster_colour,topic_last_post_subject," + \
  109.             "topic_last_post_time,topic_last_view_time,topic_moved_id," + \
  110.             "topic_bumped,topic_bumper,poll_title,poll_start,poll_length," + \
  111.             "poll_max_options,poll_last_vote,poll_vote_change," + \
  112.             "topic_visibility,topic_delete_time,topic_delete_reason," + \
  113.             "topic_delete_user,topic_posts_approved," + \
  114.             "topic_posts_unapproved,topic_posts_softdeleted) " + \
  115.             "VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s," + \
  116.             "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
  117.     data = (topic_id, forum_id, 0, 0, 0, post_table_string["title"],
  118.             user_id, post_table_string["time_post"], 0, 1, 0, 0, topic_id,
  119.             topic_user, "AA0000", topic_id, user_id, topic_user, "AA0000",
  120.             post_table_string["title"], post_table_string["time_post"],
  121.             post_table_string["time_post"], 0, 0, 0, '', 0, 0, 1, 0, 0, 1, 0,
  122.             '', 0, 1, 0, 0)
  123.     cursor.execute(query, data)
  124.     # ???????? ????? ?????? ? ??????? ??????.
  125.     part_query = "INSERT INTO " + phpbb_prefix + "posts "
  126.     query = part_query + "(post_id,topic_id,forum_id,poster_id,icon_id," + \
  127.             "poster_ip,post_time,post_reported,enable_bbcode," + \
  128.             "enable_smilies,enable_magic_url,enable_sig,post_username," + \
  129.             "post_subject,post_text,post_checksum,post_attachment, " + \
  130.             "bbcode_bitfield,bbcode_uid,post_postcount,post_edit_time," + \
  131.             "post_edit_reason,post_edit_user,post_edit_count," + \
  132.             "post_edit_locked,post_visibility,post_delete_time," + \
  133.             "post_delete_reason, post_delete_user) " + \
  134.             "VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s," + \
  135.             "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
  136.     data = (post_id, topic_id, forum_id, user_id, 0, "127.0.0.1",
  137.             post_table_string["time_post"], 0, 1, 1, 1, 1, topic_user,
  138.             post_table_string["title"], post_table_string["post_text"],
  139.             post_table_string["post_checksum"], 0, '', '', 1, 0, '', 0, 0, 0,
  140.             1, 0, '', 0)
  141.     cursor.execute(query, data)
  142.     # ???????? ????? ?????? ? ??????? topics_posted. ????? ??? ?????? ?????
  143.     # ????? ?????? ???? ????????? ??????.
  144.     part_query = "INSERT INTO " + phpbb_prefix + "topics_posted "
  145.     query = part_query + "(user_id,topic_id,topic_posted) VALUES (%s,%s,%s)"
  146.     data = (user_id, topic_id, 1)
  147.     cursor.execute(query, data)
  148.     # ????????? ?????? ? ??????? users.
  149.     part_query = "UPDATE " + phpbb_prefix + "users "
  150.     # ????? ?????????? ?????? ????????????.
  151.     query = part_query + "SET user_lastvisit = %s WHERE user_id = %s"
  152.     data = (post_table_string["time_post"], user_id)
  153.     cursor.execute(query, data)
  154.     # ????? ?????????? ????? ????????????.
  155.     query = part_query + "SET user_lastpost_time = %s WHERE user_id = %s"
  156.     data = (post_table_string["time_post"], user_id)
  157.     cursor.execute(query, data)
  158.     # ?????????? ?????? ????????????.
  159.     query = part_query + "SET user_posts = %s WHERE user_id = %s"
  160.     data = (count_post, user_id)
  161.     cursor.execute(query, data)
  162.     # ????????? ?????? ? ??????? forums.
  163.     part_query = "UPDATE " + phpbb_prefix + "forums "
  164.     # ID ?????????? ?????.
  165.     query = part_query + "SET forum_last_post_id = %s WHERE forum_id = %s"
  166.     data = (post_id, forum_id)
  167.     cursor.execute(query, data)
  168.     # ???????? ????????? ????.
  169.     query = part_query + "SET forum_last_post_subject = %s WHERE forum_id = %s"
  170.     data = (post_table_string["title"], forum_id)
  171.     cursor.execute(query, data)
  172.     # ID ?????????? ????????????, ??????????? ????.
  173.     query = part_query + "SET forum_last_poster_id = %s WHERE forum_id = %s"
  174.     data = (user_id, forum_id)
  175.     cursor.execute(query, data)
  176.     # ??? ?????????? ????????????, ??????????? ????.
  177.     query = part_query + "SET forum_last_poster_name = %s WHERE forum_id = %s"
  178.     data = (topic_user, forum_id)
  179.     cursor.execute(query, data)
  180.     # ????? ?????????? ?????.
  181.     query = part_query + "SET forum_last_post_time = %s WHERE forum_id = %s"
  182.     data = (post_table_string["time_post"], forum_id)
  183.     cursor.execute(query, data)
  184.     # ?????????? ??????????? ??????.
  185.     query = part_query + "SET forum_posts_approved = %s WHERE forum_id = %s"
  186.     data = (count_post, forum_id)
  187.     cursor.execute(query, data)
  188.     # ?????????? ??????????? ???. ??? ??? ?????????? ??? ? ?????? ?????? ?????
  189.     # ?????????? ??????, ?? ? ???????? ???????? ??????????? ??????? ??????.
  190.     query = part_query + "SET forum_topics_approved = %s WHERE forum_id = %s"
  191.     data = (count_post, forum_id)
  192.     cursor.execute(query, data)
  193.     # ????????? ?????? ? ??????? forums_track. ??? ???????? ?????????? ? ???,
  194.     # ??? ???????????? ??????? ????????? ? ???????????? ?????, ???
  195.     # ????????????? ??????. ? ??????? ???? ??????, ??????? ??????? ?? ?????.
  196.     part_query = "UPDATE " + phpbb_prefix + "forums_track "
  197.     query = part_query + "SET user_id = %s"
  198.     data = (user_id,)
  199.     cursor.execute(query, data)
  200.     query = part_query + "SET forum_id = %s"
  201.     data = (forum_id,)
  202.     cursor.execute(query, data)
  203.     query = part_query + "SET mark_time = %s"
  204.     data = (post_table_string["time_post"],)
  205.     cursor.execute(query, data)
  206.     cnx.commit()
  207.     cursor.close()
  208.     cnx.close()
  209.     topic_id += 1
  210.     post_id += 1
  211.     count_post += 1
  212.  
  213.  
  214. def create_magnet(torrent_hash):
  215.     """ ??????? ??????? ?????? ???? ??? ??????? ?????????. """
  216.     part_begin = "magnet:?xt=urn:btih:"
  217.     # ????? ???? ?????: ????????? ????? ????? ?? ?????-?? ???? ????? ??
  218.     # *.t-ru.org. ?????? ????? ?? ?????. ??????? ????? ? ?????? ????
  219.     # ??????????? ????????.
  220.     part_end = [
  221.         "&tr=http%3A%2F%2Fbt.t-ru.org%2Fann%3Fmagnet",
  222.         "&tr=http%3A%2F%2Fbt2.t-ru.org%2Fann%3Fmagnet",
  223.         "&tr=http%3A%2F%2Fbt3.t-ru.org%2Fann%3Fmagnet",
  224.         "&tr=http%3A%2F%2Fbt4.t-ru.org%2Fann%3Fmagnet"
  225.     ]
  226.     number_element = random.randrange(0, 4, 1)
  227.     magnet_link = part_begin + torrent_hash + part_end[number_element]
  228.     return magnet_link
  229.  
  230.  
  231. def create_torrent_link(torrent_hash):
  232.     """ ??????? ??????? ?????? ??? ????????? ??????? ?????. """
  233.     addr = "http://itorrents.org/torrent/"
  234.     torrent_link = addr + torrent_hash + ".torrent"
  235.     return torrent_link
  236.  
  237.  
  238. def post_hash(string):
  239.     """ ??????? ???????????? ? ?????????? ???-????? md5 ?????????? ??????. """
  240.     string = string.encode('utf-8')
  241.     hash_string = hashlib.md5(string).hexdigest()
  242.     return hash_string
  243.  
  244.  
  245. def main():
  246.     # ?????? ?????? ? ????????? ????????????? ? ???? ????????? xml ????,
  247.     # ??????? ???????? ?????? ??? ? ??????? BeautifulSoup.
  248.     first_xml_string = '<?xml version="1.0" encoding="UTF-8"?>' + '\n'
  249.     other_xml_string = "<torrents>" + '\n'
  250.     last_xml_string = "</torrents>" + '\n'
  251.     line_xml = ''
  252.     fd = open(backup_xml, "r")
  253.     for line in fd:
  254.         if line.find("<torrent id") != -1:
  255.             line_xml = first_xml_string + other_xml_string
  256.         line_xml = line_xml + line
  257.         if line.find("</torrent") != -1:
  258.             line_xml = line_xml + last_xml_string
  259.             post_table_string = parse_torrent(line_xml)
  260.             add_post_to_base(post_table_string)
  261.     fd.close()
  262.  
  263.  
  264. if __name__ == "__main__":
  265.     main()
  266.  
View raw paste Reply