1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135 | === modified file 'md_importer/importer/article.py'
--- md_importer/importer/article.py 2016-01-12 11:44:04 +0000
+++ md_importer/importer/article.py 2016-01-12 15:25:45 +0000
@@ -92,13 +92,17 @@
def replace_links(self, titles, url_map):
soup = BeautifulSoup(self.html, 'html5lib')
+ change = False
for link in soup.find_all('a'):
if not link.has_attr('class') or \
'headeranchor-link' not in link.attrs['class']:
for title in titles:
- if title.endswith(link.attrs['href']):
+ if title.endswith(link.attrs['href']) and \
+ link.attrs['href'] != url_map[title].full_url:
link.attrs['href'] = url_map[title].full_url
- self.html = soup.prettify()
+ change = True
+ if change:
+ self.html = soup.prettify()
def add_to_db(self):
'''Publishes pages in their branch alias namespace.'''
@@ -111,8 +115,10 @@
return True
def publish(self):
- self.page.publish(DEFAULT_LANG)
- self.page = self.page.get_public_object()
+ if self.page.publisher_is_draft == True or \
+ self.page.is_dirty(DEFAULT_LANG):
+ self.page.publish(DEFAULT_LANG)
+ self.page = self.page.get_public_object()
return self.page
=== modified file 'md_importer/importer/publish.py'
--- md_importer/importer/publish.py 2016-01-11 13:58:34 +0000
+++ md_importer/importer/publish.py 2016-01-12 15:49:48 +0000
@@ -2,6 +2,7 @@
from cms.api import create_page, add_plugin
from cms.models import Title
+from djangocms_text_ckeditor.html import clean_html
import logging
import re
@@ -38,18 +39,26 @@
path__regex=full_url).filter(publisher_is_draft=True)
if pages:
page = pages[0].page
- page.title = title
- page.menu_title = menu_title
- page.in_navigation = in_navigation
- page.redirect = redirect
+ if page.get_title() != title:
+ page.title = title
+ if page.get_menu_title() != menu_title:
+ page.menu_title = menu_title
+ if page.in_navigation != in_navigation:
+ page.in_navigation = in_navigation
+ if page.get_redirect() != redirect:
+ page.redirect = redirect
if html:
# We create the page, so we know there's just one placeholder
placeholder = page.placeholders.all()[0]
if placeholder.get_plugins():
plugin = placeholder.get_plugins()[0].get_plugin_instance()[0]
- plugin.body = html
- plugin.save()
+ print('checking if update is necessary', full_url, plugin.changed_date)
+ if plugin.body != clean_html(html, full=False):
+ print('updating')
+ plugin.body = html
+ plugin.save()
else:
+ print('adding new plugin')
add_plugin(
placeholder, 'RawHtmlPlugin',
DEFAULT_LANG, body=html)
@@ -65,4 +74,7 @@
if html:
placeholder = page.placeholders.get()
add_plugin(placeholder, 'RawHtmlPlugin', DEFAULT_LANG, body=html)
+ placeholder = page.placeholders.all()[0]
+ plugin = placeholder.get_plugins()[0].get_plugin_instance()[0]
+ print('creating page', full_url, plugin.changed_date)
return page
=== modified file 'md_importer/tests/test_branch_import.py'
--- md_importer/tests/test_branch_import.py 2016-01-12 14:24:01 +0000
+++ md_importer/tests/test_branch_import.py 2016-01-12 15:26:49 +0000
@@ -1,6 +1,8 @@
+from datetime import datetime
+import pytz
import shutil
-from cms.models import Page
+from cms.models import CMSPlugin, Page
from md_importer.importer.article import Article
from .utils import TestLocalBranchImport
@@ -86,3 +88,31 @@
self.assertEqual(
Page.objects.filter(publisher_is_draft=False).count(),
len(self.repo.imported_articles)+1) # articles + root
+
+
+class TestTwiceImportNoHtmlChange(TestLocalBranchImport):
+ '''Run import on the same contents twice, make sure we don't
+ update the HTML in the pages over and over again.'''
+ def runTest(self):
+ self.create_repo('data/snapcraft-test')
+ self.repo.add_directive('docs', '')
+ self.assertTrue(self.repo.execute_import_directives())
+ self.assertTrue(self.repo.publish())
+ self.assertEqual(
+ Page.objects.filter(publisher_is_draft=False).count(),
+ len(self.repo.imported_articles)+1) # articles + root
+ # Take the time before publishing the second import
+ now = datetime.now(pytz.utc)
+ shutil.rmtree(self.tempdir)
+ # Run second import
+ self.create_repo('data/snapcraft-test')
+ self.repo.add_directive('docs', '')
+ self.assertEqual(len(self.repo.directives), 1)
+ self.assertEqual(len(self.repo.imported_articles), 0)
+ self.assertTrue(self.repo.execute_import_directives())
+ self.assertTrue(self.repo.publish())
+ # Check the page's plugins
+ for plugin_change in CMSPlugin.objects.filter(
+ plugin_type='RawHtmlPlugin').order_by(
+ '-changed_date'):
+ self.assertGreater(now, plugin_change.changed_date)
|