Slexy.org is shutting down and stopped accepting new Pastes on May 4th, 2021.
Existing Pastes will stop being available on or after May 10th, 2021.
Author: yura_nn Language: python
Description: rutracker xml parser Timestamp: 2017-10-11 22:03:25 +0000
View raw paste Reply
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. #
  4. # Author: yura_nn
  5. # License: GNU/GPL v3
  6.  
  7.  
  8. from bs4 import BeautifulSoup
  9. import hashlib
  10. import random
  11. import mysql.connector
  12. import time
  13. import math
  14. import re
  15.  
  16.  
  17. # ????? ??????? ????????? ??????????? ? ???? ??????.
  18. host = "localhost"
  19. database = "rutracker"
  20. user = "user_mysql"
  21. password = "password"
  22. phpbb_prefix = "rutrk_"
  23. # ???? ? ????? xml.
  24. backup_xml = "/path/to/file.xml"
  25. # ????????? ????????????? ????. ??????? topics, ???? topic_id.
  26. topic_id = 1
  27. # ????????? ????????????? ?????. ??????? posts, ???? post_id.
  28. post_id = 1
  29. # ??????? ?????? ? ????????? ?????????. ??????? users, ???? user_posts.
  30. count_post = 1
  31. # ????????????? ??????, ? ??????? ????? ??????????? ???? ?????????. ??????????
  32. # ????? ? ??????? forums.
  33. forum_id = 2
  34. # ????????????? ???????????? ?????? phpBB3. ??????? users, ???? user_id.
  35. user_id = 2
  36. # ??? ???????????? ?????? phpBB3.
  37. topic_user = "user_phpBB3"
  38.  
  39.  
  40. def parse_torrent(line_xml):
  41.     """ ??????? ?????? ???????????? ??????. ????????? ?????? ?????????????
  42.    ????????. ? ????? ????????? ???????, ?????????? ??????????? ???????? ?
  43.    ?????????? ???. """
  44.     soup = BeautifulSoup(line_xml, "xml")
  45.     torrent_all = soup.findAll("torrent")
  46.     torrent_id = torrent_all[0]["id"]
  47.     registred_at = torrent_all[0]["registred_at"]
  48.     torrent_size = torrent_all[0]["size"]
  49.     title = soup.title.string
  50.     title = str(title)
  51.     torrent_hash = torrent_all[1]["hash"]
  52.     tracker_id = torrent_all[1]["tracker_id"]
  53.     forum_id_old = soup.forum["id"]
  54.     forum_name = soup.forum.string
  55.     # ???????? ?????? ??????.
  56.     magnet_dht_only = create_magnet_dht(torrent_hash)
  57.     magnet_rutracker = create_magnet_rutracker(torrent_hash)
  58.     magnet_dht_only = "[br]" + "?????? ?????? (?????? DHT): " + '\n' + \
  59.                       "[code]" + magnet_dht_only + "[/code]" + '\n'
  60.     magnet_rutracker = "[br]" + "?????? ?????? (? bt*.t-ru.org): " + '\n' + \
  61.                        "[code]" + magnet_rutracker + "[/code]" + '\n'
  62.     # ?????? ???? ???????.
  63.     hash_string = "[br]" + "??? ???????:" + '\n' + "[code]" + \
  64.                   torrent_hash + "[/code]" + '\n'
  65.     data_torrent = "???? ???????? ???????: " + registred_at + '\n'
  66.     post_text = soup.content.string
  67.     # ?????????? ???? ???????? ???????, ?????? ??????, ??????? ?????,
  68.     # ? ???????? ???? ??????? ????? ? ????.
  69.     post_text = post_text + '\n' + magnet_rutracker
  70.     post_text = post_text + magnet_dht_only
  71.     post_text = post_text + hash_string
  72.     post_text = post_text + data_torrent
  73.     # ??????????????? ?????? ????? ? ?????? ????????? phpbb.
  74.     post_text = convert_post_text(post_text)
  75.     # ? ??????? ???? ?????? ????? ??????? ???-????? ?????????.
  76.     post_checksum = post_hash(post_text)
  77.     # ????????????? bbcode ??? ?????.
  78.     bbcode_uid = random_id()
  79.     # ????? ??????????? ???????? ???????? ???????.
  80.     time_post = math.floor(time.time())
  81.     post_table_string = {
  82.         "torrent_id": torrent_id,
  83.         "title": title,
  84.         "post_text": post_text,
  85.         "post_checksum": post_checksum,
  86.         "bbcode_uid": bbcode_uid,
  87.         "time_post": time_post,
  88.         "torrent_size": torrent_size,
  89.         "tracker_id": tracker_id,
  90.         "forum_id_old": forum_id_old,
  91.         "forum_name": forum_name
  92.     }
  93.     return post_table_string
  94.  
  95.  
  96. def add_post_to_base(post_table_string):
  97.     """ ??????? ??????? ?????????? ? ???????? ? ???? ??????. """
  98.     global topic_id
  99.     global post_id
  100.     global count_post
  101.     cnx = mysql.connector.connect(host=host,
  102.                                   database=database,
  103.                                   user=user,
  104.                                   password=password)
  105.     cursor = cnx.cursor()
  106.     # ???????? ????? ?????? ? ??????? ???.
  107.     part_query = "INSERT INTO " + phpbb_prefix + "topics "
  108.     query = part_query + "(topic_id,forum_id,icon_id,topic_attachment," + \
  109.             "topic_reported,topic_title,topic_poster,topic_time," + \
  110.             "topic_time_limit,topic_views,topic_status,topic_type," + \
  111.             "topic_first_post_id,topic_first_poster_name," + \
  112.             "topic_first_poster_colour,topic_last_post_id," + \
  113.             "topic_last_poster_id,topic_last_poster_name," + \
  114.             "topic_last_poster_colour,topic_last_post_subject," + \
  115.             "topic_last_post_time,topic_last_view_time,topic_moved_id," + \
  116.             "topic_bumped,topic_bumper,poll_title,poll_start,poll_length," + \
  117.             "poll_max_options,poll_last_vote,poll_vote_change," + \
  118.             "topic_visibility,topic_delete_time,topic_delete_reason," + \
  119.             "topic_delete_user,topic_posts_approved," + \
  120.             "topic_posts_unapproved,topic_posts_softdeleted) " + \
  121.             "VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s," + \
  122.             "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
  123.     data = (topic_id, forum_id, 0, 0, 0, post_table_string["title"],
  124.             user_id, post_table_string["time_post"], 0, 1, 0, 0, topic_id,
  125.             topic_user, "AA0000", topic_id, user_id, topic_user, "AA0000",
  126.             post_table_string["title"], post_table_string["time_post"],
  127.             post_table_string["time_post"], 0, 0, 0, '', 0, 0, 1, 0, 0, 1, 0,
  128.             '', 0, 1, 0, 0)
  129.     cursor.execute(query, data)
  130.     # ???????? ????? ?????? ? ??????? ??????.
  131.     part_query = "INSERT INTO " + phpbb_prefix + "posts "
  132.     query = part_query + "(post_id,topic_id,forum_id,poster_id,icon_id," + \
  133.             "poster_ip,post_time,post_reported,enable_bbcode," + \
  134.             "enable_smilies,enable_magic_url,enable_sig,post_username," + \
  135.             "post_subject,post_text,post_checksum,post_attachment, " + \
  136.             "bbcode_bitfield,bbcode_uid,post_postcount,post_edit_time," + \
  137.             "post_edit_reason,post_edit_user,post_edit_count," + \
  138.             "post_edit_locked,post_visibility,post_delete_time," + \
  139.             "post_delete_reason, post_delete_user) " + \
  140.             "VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s," + \
  141.             "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
  142.     data = (post_id, topic_id, forum_id, user_id, 0, "127.0.0.1",
  143.             post_table_string["time_post"], 0, 1, 1, 1, 1, topic_user,
  144.             post_table_string["title"], post_table_string["post_text"],
  145.             post_table_string["post_checksum"], 0, '',
  146.             post_table_string["bbcode_uid"], 1, 0, '', 0, 0, 0, 1, 0, '', 0)
  147.     cursor.execute(query, data)
  148.     # ???????? ????? ?????? ? ??????? topics_posted. ????? ??? ?????? ?????
  149.     # ????? ?????? ???? ????????? ??????.
  150.     part_query = "INSERT INTO " + phpbb_prefix + "topics_posted "
  151.     query = part_query + "(user_id,topic_id,topic_posted) VALUES (%s,%s,%s)"
  152.     data = (user_id, topic_id, 1)
  153.     cursor.execute(query, data)
  154.     # ????????? ?????? ? ??????? users.
  155.     part_query = "UPDATE " + phpbb_prefix + "users "
  156.     # ????? ?????????? ?????? ????????????.
  157.     query = part_query + "SET user_lastvisit = %s WHERE user_id = %s"
  158.     data = (post_table_string["time_post"], user_id)
  159.     cursor.execute(query, data)
  160.     # ????? ?????????? ????? ????????????.
  161.     query = part_query + "SET user_lastpost_time = %s WHERE user_id = %s"
  162.     data = (post_table_string["time_post"], user_id)
  163.     cursor.execute(query, data)
  164.     # ?????????? ?????? ????????????.
  165.     query = part_query + "SET user_posts = %s WHERE user_id = %s"
  166.     data = (count_post, user_id)
  167.     cursor.execute(query, data)
  168.     # ????????? ?????? ? ??????? forums.
  169.     part_query = "UPDATE " + phpbb_prefix + "forums "
  170.     # ID ?????????? ?????.
  171.     query = part_query + "SET forum_last_post_id = %s WHERE forum_id = %s"
  172.     data = (post_id, forum_id)
  173.     cursor.execute(query, data)
  174.     # ???????? ????????? ????.
  175.     query = part_query + "SET forum_last_post_subject = %s WHERE forum_id = %s"
  176.     data = (post_table_string["title"], forum_id)
  177.     cursor.execute(query, data)
  178.     # ID ?????????? ????????????, ??????????? ????.
  179.     query = part_query + "SET forum_last_poster_id = %s WHERE forum_id = %s"
  180.     data = (user_id, forum_id)
  181.     cursor.execute(query, data)
  182.     # ??? ?????????? ????????????, ??????????? ????.
  183.     query = part_query + "SET forum_last_poster_name = %s WHERE forum_id = %s"
  184.     data = (topic_user, forum_id)
  185.     cursor.execute(query, data)
  186.     # ????? ?????????? ?????.
  187.     query = part_query + "SET forum_last_post_time = %s WHERE forum_id = %s"
  188.     data = (post_table_string["time_post"], forum_id)
  189.     cursor.execute(query, data)
  190.     # ?????????? ??????????? ??????.
  191.     query = part_query + "SET forum_posts_approved = %s WHERE forum_id = %s"
  192.     data = (count_post, forum_id)
  193.     cursor.execute(query, data)
  194.     # ?????????? ??????????? ???. ??? ??? ?????????? ??? ? ?????? ?????? ?????
  195.     # ?????????? ??????, ?? ? ???????? ???????? ??????????? ??????? ??????.
  196.     query = part_query + "SET forum_topics_approved = %s WHERE forum_id = %s"
  197.     data = (count_post, forum_id)
  198.     cursor.execute(query, data)
  199.     # ????????? ?????? ? ??????? forums_track. ??? ???????? ?????????? ? ???,
  200.     # ??? ???????????? ??????? ????????? ? ???????????? ?????, ???
  201.     # ????????????? ??????. ? ??????? ???? ??????, ??????? ??????? ?? ?????.
  202.     part_query = "UPDATE " + phpbb_prefix + "forums_track "
  203.     query = part_query + "SET user_id = %s"
  204.     data = (user_id,)
  205.     cursor.execute(query, data)
  206.     query = part_query + "SET forum_id = %s"
  207.     data = (forum_id,)
  208.     cursor.execute(query, data)
  209.     query = part_query + "SET mark_time = %s"
  210.     data = (post_table_string["time_post"],)
  211.     cursor.execute(query, data)
  212.     cnx.commit()
  213.     cursor.close()
  214.     cnx.close()
  215.     topic_id += 1
  216.     post_id += 1
  217.     count_post += 1
  218.  
  219.  
  220. def create_magnet_dht(torrent_hash):
  221.     """ ??????? ??????? ?????? DHT ?????? ???? ??? ??????? ?????????. """
  222.     part_begin = "magnet:?xt=urn:btih:"
  223.     magnet_link = part_begin + torrent_hash
  224.     return magnet_link
  225.  
  226.  
  227. def create_magnet_rutracker(torrent_hash):
  228.     """ ??????? ??????? ?????? ???? ??? ??????? ?????????, ??????? ????? ?????
  229.    ? ?????????? ???????? bt*.t-ru.org. """
  230.     part_begin = "magnet:?xt=urn:btih:"
  231.     part_end = [
  232.         "&tr=http%3A%2F%2Fbt.t-ru.org%2Fann%3Fmagnet",
  233.         "&tr=http%3A%2F%2Fbt2.t-ru.org%2Fann%3Fmagnet",
  234.         "&tr=http%3A%2F%2Fbt3.t-ru.org%2Fann%3Fmagnet",
  235.         "&tr=http%3A%2F%2Fbt4.t-ru.org%2Fann%3Fmagnet"
  236.     ]
  237.     number_element = random.randrange(0, 4, 1)
  238.     magnet_link = part_begin + torrent_hash + part_end[number_element]
  239.     return magnet_link
  240.  
  241.  
  242. def post_hash(string):
  243.     """ ??????? ???????????? ? ?????????? ???-????? md5 ?????????? ??????. """
  244.     string = string.encode('utf-8')
  245.     hash_string = hashlib.md5(string).hexdigest()
  246.     return hash_string
  247.  
  248.  
  249. def random_id():
  250.     """ ??????? ?????????? ????????? ?????-????????? ?????????????. """
  251.     symbols_list = "qwertyuiopasdfghjklzxcvbnm1234567890"
  252.     count_loop = random.randrange(1, 8, 1)
  253.     # ???????????? ?????????? ???????? ?????? ????????.
  254.     for i in range(count_loop):
  255.         symbols_list = symbols_list + symbols_list
  256.     symbols_list = list(symbols_list)
  257.     random.shuffle(symbols_list)
  258.     destination_id = ''
  259.     # ?????????? ???????? ??????????????.
  260.     for i in range(8):
  261.         destination_id = destination_id + symbols_list[i]
  262.     return destination_id
  263.  
  264.  
  265. def convert_post_text(post_text):
  266.     """ ? ???? ?????? ????? ?????? ??? phpbb ????? ? ????? ??????????? ???????.
  267.    ??? ??????? ??????????? ????? ????? ? ??????????? ??????. """
  268.     # ???? ??????? ??????? ?? ????????? ?????? ??? ????, ????? ???????? ?
  269.     # ?????? ?????? ??????? <br/>. ???? ????? ?? ???????, ?? ??????????????
  270.     # ????? ????????.
  271.     post_text = post_text.splitlines()
  272.     destination_string = ''
  273.     i = 0
  274.     len_post_text = len(post_text)
  275.     while i < len_post_text:
  276.         new_str = post_text[i] + "<br/>" + '\n'
  277.         del post_text[i]
  278.         post_text.insert(i, new_str)
  279.         destination_string = destination_string + post_text[i]
  280.         i += 1
  281.     # ?????? ??? ??? ???????? ??????????? ? ????? ??????? ???????.
  282.     post_text = destination_string
  283.     # ???????? ????????? img.
  284.     post_text = re.sub(
  285.         r"(\[[iI][mM][gG]\][/\.\-:a-zA-Z?-??-???0-9_\?=#&;%\(\),+!\*<>]{1,}\[/[iI][mM][gG]\])", '',
  286.         post_text)
  287.     post_text = re.sub(
  288.         r"(\[[iI][mM][gG]=[rR][iI][gG][hH][tT]\][/\.\-:a-zA-Z?-??-???0-9_\?=#&;%\(\),+!\*<>]{1,}\[/[iI][mM][gG]\])",
  289.         '', post_text)
  290.     post_text = re.sub(
  291.         r"(\[[iI][mM][gG]=[lL][eE][fF][tT]\][/\.\-:a-zA-Z?-??-???0-9_\?=#&;%\(\),+!\*<>]{1,}\[/[iI][mM][gG]\])",
  292.         '', post_text)
  293.     post_text = "<r>" + '\n' + post_text + "</r>"
  294.     # ??????? list.
  295.     if re.search(r"(\[[lL][iI][sS][tT]=?1?\])", post_text) is not None:
  296.         # ???????? ?????? ???????????? ???????????? ??????? ? ????.
  297.         list_value = re.findall(
  298.             r"(\[[lL][iI][sS][tT]=?1?\][(\[\*\])a-zA-Z?-??-???0-9\.,\'\"\-\s_\?=#&<>;/:\)\(!]{1,}\[/[lL][iI][sS][tT]\])",
  299.             post_text)
  300.         list_value = check_list(list_value)
  301.         for i in list_value:
  302.             # ?????????? ????????? ??? ?????????? ?????????? ???? list.
  303.             tag_begin = re.search(r"(\[[lL][iI][sS][tT]=?1?]<?[b]?[r]?/?>?)",
  304.                                   i)
  305.             tag_begin = tag_begin.group(0)
  306.             tag_begin = re.sub(r"(\n?(<br/>)?)", '', tag_begin)
  307.             tag_begin = "<LIST><s>" + tag_begin + "</s>" + "<br/>" + '\n'
  308.             tag_end = "<e>[/list]</e></LIST>"
  309.             # ?????????? ????????? ??? ?????????? ????????? ??????.
  310.             list_elements = re.findall(
  311.                 r"(\[\*\][a-zA-Z?-??-???0-9\'\"\.,\-\s_\?=#&<>;/?:\)\(!]{1,})",
  312.                 i)
  313.             destination_value = ''
  314.             for n in list_elements:
  315.                 new_str = re.sub(r"(\n?(<br/>)?)", '', n)
  316.                 new_str = re.sub(r"(\[\*\])", '', new_str)
  317.                 new_str = "<LI><s>[*]</s>" + new_str + "</LI>" + '\n'
  318.                 destination_value = destination_value + new_str
  319.             destination_value = tag_begin + destination_value + tag_end
  320.             post_text = post_text.replace(i, destination_value)
  321.     # ????????? ??? font.
  322.     if re.search(r"(\[[fF][oO][nN][tT]=)", post_text) is not None:
  323.         list_value = re.findall(
  324.             r"(\[[fF][oO][nN][tT]=[\'\"]?[a-zA-Z?-??-???0-9^$\(\),\.\s\-_:\?/=#&!\*<>+]{1,}[\'\"]?\])",
  325.             post_text)
  326.         list_value = check_list(list_value)
  327.         for i in list_value:
  328.             destination_value = re.sub(r"(\[[fF][oO][nN][tT]=[\'\"]?)",
  329.                                        '', i)
  330.             destination_value = re.sub(r"([\'\"]?\])", '', destination_value)
  331.             destination_value = '<FONT font="' + destination_value \
  332.                                                + '"><s>' + i + "</s>"
  333.             post_text = post_text.replace(i, destination_value)
  334.     # ????????? ??? color.
  335.     if re.search(r"(\[[cC][oO][lL][oO][rR]=)", post_text) is not None:
  336.         list_value = re.findall(
  337.             r"(\[[cC][oO][lL][oO][rR]=[\'\"]?[#0-9a-zA-Z]{1,}[\'\"]?\])",
  338.             post_text)
  339.         list_value = check_list(list_value)
  340.         for i in list_value:
  341.             destination_value = re.sub(r"(\[[cC][oO][lL][oO][rR]=[\'\"]?)",
  342.                                        '', i)
  343.             destination_value = re.sub(r"([\'\"]?\])", '', destination_value)
  344.             destination_value = '<COLOR color="' + destination_value \
  345.                                                + '"><s>' + i + "</s>"
  346.             post_text = post_text.replace(i, destination_value)
  347.     # ????????? ??? size.
  348.     if re.search(r"(\[[sS][iI][zZ][eE]=)", post_text) is not None:
  349.         list_value = re.findall(
  350.             r"(\[[sS][iI][zZ][eE]=[\"\']?[0-9]{,3}[\"\']?\])", post_text)
  351.         list_value = check_list(list_value)
  352.         for i in list_value:
  353.             destination_value = re.sub(r"(\[[sS][iI][zZ][eE]=[\'\"]?)", '', i)
  354.             destination_value = re.sub(r"([\'\"]?\])", '', destination_value)
  355.             destination_value = '<SIZE size="' + destination_value + \
  356.                                              '"><s>' + i + "</s>"
  357.             post_text = post_text.replace(i, destination_value)
  358.     # ????????? ??? url.
  359.     if re.search(r"(\[[uU][rR][lL]=)", post_text) is not None:
  360.         list_value = re.findall(
  361.             r"(\[[uU][rR][lL]=[\"\']?[-a-zA-Z?-??-???0-9\.\/\:\?=#&@;_%\(\),+!\*<>]{1,}[\"\']?\])",
  362.             post_text)
  363.         list_value = check_list(list_value)
  364.         for i in list_value:
  365.             destination_value = re.sub(r"(\[[uU][rR][lL]=[\'\"]?)", '', i)
  366.             destination_value = re.sub(r"([\'\"]?\])", '', destination_value)
  367.             destination_value = '<URL url="' + destination_value + '"><s>' + \
  368.                                            i + "</s>"
  369.             post_text = post_text.replace(i, destination_value)
  370.     # ????????? ??? spoiler ? ?????????.
  371.     if re.search(r"(\[[sS][pP][oO][iI][lL][eE][rR]=)",
  372.                  post_text) is not None:
  373.         list_value = re.findall(
  374.             r"(\[[sS][pP][oO][iI][lL][eE][rR]=[\"\'][a-zA-Z?-??-???0-9^$\(\),\.\s\-_:\?/=#&!\*<>\[\]+]{1,}[\"\']\])",
  375.             post_text)
  376.         list_value = check_list(list_value)
  377.         for i in list_value:
  378.             destination_value = re.sub(r"(\[[sS][pP][oO][iI][lL][eE][rR]=[\'\"])",
  379.                                        '', i)
  380.             destination_value = re.sub(r"([\'\"]\])", '', destination_value)
  381.             destination_value = "<SPOILER spoiler=" + '"' + \
  382.                                                     destination_value + '"' + \
  383.                                                     '><s>' + '[spoiler="' + \
  384.                                                     destination_value + '"' + \
  385.                                                     ']' + "</s>"
  386.             post_text = post_text.replace(i, destination_value)
  387.     # ????????? ??? align".
  388.     if re.search(r"(\[[aA][lL][iI][gG][nN]=)", post_text) is not None:
  389.         list_value = re.findall(
  390.             r"(\[[aA][lL][iI][gG][nN]=[\'\"]?[a-zA-Z]{1,}[\'\"]?\])",
  391.             post_text)
  392.         list_value = check_list(list_value)
  393.         for i in list_value:
  394.             destination_value = re.sub(r"(\[[aA][lL][iI][gG][nN]=[\'\"]?)",
  395.                                        '', i)
  396.             destination_value = re.sub(r"([\'\"]?\])", '', destination_value)
  397.             destination_value = '<ALIGN align="' + destination_value + \
  398.                                                '"><s>[align=' \
  399.                                                + destination_value + \
  400.                                                "]</s>"
  401.             post_text = post_text.replace(i, destination_value)
  402.     # ??????? quote.
  403.     if re.search(r"(\[[qQ][uU][oO][tT][eE]\])", post_text) is not None:
  404.         post_text = re.sub(r"(\[[qQ][uU][oO][tT][eE]\])",
  405.                            "<QUOTE><s>[quote]</s>", post_text)
  406.     if re.search(r"(\[/[qQ][uU][oO][tT][eE]\])", post_text) is not None:
  407.         post_text = re.sub(r"(\[/[qQ][uU][oO][tT][eE]\])",
  408.                            "<e>[/quote]</e></QUOTE>", post_text)
  409.     # ??????? code.
  410.     if re.search(r"(\[[cC][oO][dD][eE]\])", post_text) is not None:
  411.         post_text = re.sub(r"(\[[cC][oO][dD][eE]\])",
  412.                            "<CODE><s>[code]</s>", post_text)
  413.     if re.search(r"(\[/[cC][oO][dD][eE]\])", post_text) is not None:
  414.         post_text = re.sub(r"(\[/[cC][oO][dD][eE]\])",
  415.                            "<e>[/code]</e></CODE>", post_text)
  416.     # ??????? pre.
  417.     if re.search(r"(\[[pP][rR][eE]\])", post_text) is not None:
  418.         post_text = re.sub(r"(\[[pP][rR][eE]\])",
  419.                            "<PRE><s>[pre]</s>", post_text)
  420.     if re.search(r"(\[/[pP][rR][eE]\])", post_text) is not None:
  421.         post_text = re.sub(r"(\[/[pP][rR][eE]\])",
  422.                            "<e>[/pre]</e></PRE>", post_text)
  423.     # ??????? b.
  424.     if re.search(r"(\[[bB]\])", post_text) is not None:
  425.         post_text = re.sub(r"(\[[bB]\])", "<B><s>[b]</s>", post_text)
  426.     if re.search(r"(\[/[bB]\])", post_text) is not None:
  427.         post_text = re.sub(r"(\[/[bB]\])", "<e>[/b]</e></B>", post_text)
  428.     # ??????? i.
  429.     if re.search(r"(\[[iI]\])", post_text) is not None:
  430.         post_text = re.sub(r"(\[[iI]\])", "<I><s>[i]</s>", post_text)
  431.     if re.search(r"(\[/[iI]\])", post_text) is not None:
  432.         post_text = re.sub(r"(\[/[iI]\])", "<e>[/i]</e></I>", post_text)
  433.     # ??????? u.
  434.     if re.search(r"(\[[uU]\])", post_text) is not None:
  435.         post_text = re.sub(r"(\[[uU]\])", "<U><s>[u]</s>", post_text)
  436.     if re.search(r"(\[/[uU]\])", post_text) is not None:
  437.         post_text = re.sub(r"(\[/[uU]\])", "<e>[/u]</e></U>", post_text)
  438.     # ??????? br.
  439.     if re.search(r"(\[[bB][rR]\])", post_text) is not None:
  440.         post_text = re.sub(r"(\[[bB][rR]\])",
  441.                            "<BR><s>[br]</s><e>[/br]</e></BR>", post_text)
  442.     # ??????? hr.
  443.     if re.search(r"(\[[hH][rR]\])", post_text) is not None:
  444.         post_text = re.sub(r"(\[[hH][rR]\])",
  445.                            "<HR><s>[hr]</s><e>[/hr]</e></HR>", post_text)
  446.     # ????????? ??? spoiler, ??? ????????.
  447.     if re.search(r"(\[[sS][pP][oO][iI][lL][eE][rR]\])", post_text) \
  448.        is not None:
  449.         post_text = re.sub(r"(\[[sS][pP][oO][iI][lL][eE][rR]\])",
  450.                            "<SPOILER><s>[spoiler]</s>", post_text)
  451.     # ??????????? ??? spoiler.
  452.     if re.search(r"(\[/[sS][pP][oO][iI][lL][eE][rR]\])", post_text) \
  453.        is not None:
  454.         post_text = re.sub(r"(\[/[sS][pP][oO][iI][lL][eE][rR]\])",
  455.                            "<e>[/spoiler]</e></SPOILER>", post_text)
  456.     # ??????????? ??? size.
  457.     if re.search(r"(\[/[sS][iI][zZ][eE]\])", post_text) is not None:
  458.         post_text = re.sub(r"(\[/[sS][iI][zZ][eE]\])",
  459.                            "<e>[/size]</e></SIZE>", post_text)
  460.     # ??????????? ??? color.
  461.     if re.search(r"(\[/[cC][oO][lL][oO][rR]\])", post_text) is not None:
  462.         post_text = re.sub(r"(\[/[cC][oO][lL][oO][rR]\])",
  463.                            "<e>[/color]</e></COLOR>", post_text)
  464.     # ??????????? ??? url.
  465.     if re.search(r"(\[/[uU][rR][lL]\])", post_text) is not None:
  466.         post_text = re.sub(r"(\[/[uU][rR][lL]\])",
  467.                            "<e>[/url]</e></URL>", post_text)
  468.     # ??????????? ??? align.
  469.     if re.search(r"(\[/[aA][lL][iI][gG][nN]\])", post_text) is not None:
  470.         post_text = re.sub(r"(\[/[aA][lL][iI][gG][nN]\])",
  471.                            "<e>[/align]</e></ALIGN>", post_text)
  472.     # ??????????? ??? font.
  473.     if re.search(r"(\[/[fF][oO][nN][tT]\])", post_text) is not None:
  474.         post_text = re.sub(r"(\[/[fF][oO][nN][tT]\])",
  475.                            "<e>[/font]</e></FONT>", post_text)
  476.     return post_text
  477.  
  478.  
  479. def check_list(list_value):
  480.     """ ????? ???????????? ?????? ?????????? ??? ?????? ?????, ??????, ????? ?
  481.    ??? ???? ????????????? ??????, ????? ?????? ????? ????????? ? ???
  482.    ??????????. ??? ??????? ??????? ??????????. """
  483.     current_list = []
  484.     n = 0
  485.     len_list_value = len(list_value)
  486.     while n < len_list_value:
  487.         flag = False
  488.         for i in current_list:
  489.             if list_value[n] == i:
  490.                 flag = True
  491.         if flag is False:
  492.             current_list.append(list_value[n])
  493.         n += 1
  494.     return current_list
  495.  
  496.  
  497. def main():
  498.     # ?????? ?????? ? ????????? ????????????? ? ???? ????????? xml ????,
  499.     # ??????? ???????? ?????? ??? ? ??????? BeautifulSoup.
  500.     first_xml_string = '<?xml version="1.0" encoding="UTF-8"?>' + '\n'
  501.     other_xml_string = "<torrents>" + '\n'
  502.     last_xml_string = "</torrents>" + '\n'
  503.     line_xml = ''
  504.     fd = open(backup_xml, "r")
  505.     for line in fd:
  506.         if line.find("<torrent id") != -1:
  507.             line_xml = first_xml_string + other_xml_string
  508.         line_xml = line_xml + line
  509.         if line.find("</torrent") != -1:
  510.             line_xml = line_xml + last_xml_string
  511.             post_table_string = parse_torrent(line_xml)
  512.             add_post_to_base(post_table_string)
  513.     fd.close()
  514.  
  515.  
  516. if __name__ == "__main__":
  517.     main()
  518.  
View raw paste Reply