From ba8abb305a9acae086637d1a0021d51a1b8010ec Mon Sep 17 00:00:00 2001 From: Boussard Date: Wed, 23 Feb 2011 15:44:56 +0100 Subject: [PATCH 01/29] 1 - Add possibility to configure export via ini file. This file is located in $INSTANCE_HOME and is name jsonmigrator.ini. You can configure the mapping of class 2 - Add new options : MAX_CACHE_DB : purge zodb cache every x item treat (avoid memory error on big Data.fs) JUST_TREAT_WAPPER : just treat classname that are mapping on a wrapper 3 - Add logging support 4 - Add Article322Wrapper -> for old plone article --- export_scripts/plone2.0_export.py | 437 +++++++++++++++++++++++------- 1 file changed, 339 insertions(+), 98 deletions(-) diff --git a/export_scripts/plone2.0_export.py b/export_scripts/plone2.0_export.py index a5d3887..0d12817 100644 --- a/export_scripts/plone2.0_export.py +++ b/export_scripts/plone2.0_export.py @@ -10,30 +10,134 @@ import os import shutil +import ConfigParser +### DEPENDENCY 2.0.0 for python2.3 import simplejson + from datetime import datetime from Acquisition import aq_base from Products.CMFCore.utils import getToolByName +from App.config import getConfiguration +CONFIG = ConfigParser.SafeConfigParser() +CONFIG.optionxform = str +import logging +logger = logging.getLogger('plone20_export') + +try: + #import pdb;pdb.set_trace(); + CONFIG.readfp(open(os.path.join(getConfiguration().instancehome, + 'jsonmigrator.ini'))) +except: + logger.exception() + logger.warning('Please specify ini file jsonmigrator.ini in your %s' \ + % getConfiguration().instancehome) COUNTER = 1 -HOMEDIR = '/Users/rok/Projects/yaco/unex_exported_data' -CLASSNAME_TO_SKIP_LAUD = ['ControllerPythonScript', - 'ControllerPageTemplate', 'ControllerValidator', 'PythonScript', 'SQL', 'Connection', - 'ZetadbScript', 'ExternalMethod', 'ZetadbSqlInsert', 'ZetadbMysqlda', 'SiteRoot', - 'ZetadbApplication', 'ZetadbZptInsert', 'I18NLayer', 'ZetadbZptView', 'BrowserIdManager', - 'ZetadbScriptSelectMaster', 'ZetadbSqlSelect', ] -CLASSNAME_TO_SKIP = ['CatalogTool', 'MemberDataTool', 'SkinsTool', 'TypesTool', - 'UndoTool', 'URLTool', 'WorkflowTool', 'DiscussionTool', 'MembershipTool', - 'RegistrationTool', 'PropertiesTool', 'MetadataTool', 'SyndicationTool', - 'PloneTool', 'NavigationTool', 'FactoryTool', 'FormTool', 'MigrationTool', - 'CalendarTool', 'QuickInstallerTool', 'GroupsTool', 'GroupDataTool', 'MailHost', - 'CookieCrumbler', 'ContentTypeRegistry', 'GroupUserFolder', 'CachingPolicyManager', - 'InterfaceTool', 'PloneControlPanel', 'FormController', 'SiteErrorLog', 'SinTool', - 'ArchetypeTool', 'RAMCacheManager', 'PloneArticleTool', 'SyndicationInformation', - 'ActionIconsTool', 'AcceleratedHTTPCacheManager', 'ActionsTool', 'UIDCatalog', - 'ReferenceCatalog', 'ContentPanelsTool', 'MimeTypesRegistry', 'LanguageTool', - 'TransformTool'] -ID_TO_SKIP = ['Members', ] + +############## Move configuration to jsonmigrator.ini +############## in DEFAULT section specify +############## - CLASSNAME_TO_SKIP_LAUD (list separated by CARRIAGE_RETURN) +############## - CLASSNAME_TO_SKIP (list separated by CARRIAGE_RETURN) + +def getconf(option, default): + global CONFIG + if not CONFIG.has_option('DEFAULT', option): + return default + else: + return CONFIG.get('DEFAULT', option) + + + +HOMEDIR = getconf('HOMEDIR', + '/Users/rok/Projects/yaco/unex_exported_data') +logger.info("HOMEDIR : %s" % HOMEDIR) + +CLASSNAME_TO_SKIP_LAUD = [x.strip() for x \ + in getconf('CLASSNAME_TO_SKIP_LAUD', + """ControllerPythonScript + ControllerPageTemplate + ControllerValidator + PythonScript + SQL + Connection + ZetadbScript + ExternalMethod + ZetadbSqlInsert + ZetadbMysqlda + SiteRoot + ZetadbApplication + ZetadbZptInsert + I18NLayer + ZetadbZptView + BrowserIdManager + ZetadbScriptSelectMaster + ZetadbSqlSelect""").splitlines()] + +CLASSNAME_TO_SKIP = [x.strip() for x \ + in getconf('CLASSNAME_TO_SKIP', + """CatalogTool + MemberDataTool + SkinsTool + TypesTool + UndoTool + URLTool + WorkflowTool + DiscussionTool + MembershipTool + RegistrationTool + PropertiesTool + MetadataTool + SyndicationTool + PloneTool + NavigationTool + FactoryTool + FormTool + MigrationTool + CalendarTool + QuickInstallerTool + GroupsTool + GroupDataTool + MailHost + CookieCrumbler + ContentTypeRegistry + GroupUserFolder + CachingPolicyManager + InterfaceTool + PloneControlPanel + FormController + SiteErrorLog + SinTool + ArchetypeTool + RAMCacheManager + PloneArticleTool + SyndicationInformation + ActionIconsTool + AcceleratedHTTPCacheManager + ActionsTool + UIDCatalog + ReferenceCatalog + ContentPanelsTool + MimeTypesRegistry + LanguageTool + TransformTool""").splitlines()] + +ID_TO_SKIP = [x.strip() for x \ + in getconf('ID_TO_SKIP', + """Members""").splitlines()] +NON_FOLDERISH_CLASSNAME = [x.strip() for x \ + in getconf('NON_FOLDERISH_CLASSNAME', + """PloneArticle""").splitlines()] +JUST_TREAT_WAPPER = False +try: + JUST_TREAT_WAPPER = eval(getconf('JUST_TREAT_WAPPER',False)) +except: + JUST_TREAT_WAPPER = False +print 'ID_TO_SKIP %s ' % str(ID_TO_SKIP) + +try: + MAX_CACHE_DB = int(getconf('MAX_CACHE_DB', 500)) +except: + MAX_CACHE_DB = 500 def export_plone20(self): @@ -44,7 +148,9 @@ def export_plone20(self): COUNTER = 1 TODAY = datetime.today() - TMPDIR = HOMEDIR+'/content_'+self.getId()+'_'+TODAY.strftime('%Y-%m-%d-%H-%M-%S') + TMPDIR = os.path.join(HOMEDIR,'content_%s_%s' % \ + (self.getId(), + TODAY.strftime('%Y-%m-%d-%H-%M-%S'))) id_to_skip = self.REQUEST.get('id_to_skip', None) if id_to_skip is not None: @@ -54,7 +160,7 @@ def export_plone20(self): shutil.rmtree(TMPDIR) else: os.mkdir(TMPDIR) - + write(walk(self)) # TODO: we should return something more useful @@ -62,39 +168,55 @@ def export_plone20(self): def walk(folder): + global COUNTER for item_id in folder.objectIds(): item = folder[item_id] if item.__class__.__name__ in CLASSNAME_TO_SKIP or \ - item.getId() in ID_TO_SKIP: - continue - if item.__class__.__name__ in CLASSNAME_TO_SKIP_LAUD: - print '>> SKIPPING :: ['+item.__class__.__name__+'] '+item.absolute_url() + item.getId() in ID_TO_SKIP or (JUST_TREAT_WAPPER and \ + item.__class__.__name__\ + not in CLASSNAME_TO_WAPPER_MAP) or \ + (item.__class__.__name__ in CLASSNAME_TO_SKIP_LAUD): + logger.info('>> SKIPPING :: ['+item.__class__.__name__+'] '\ + + item.absolute_url()) continue + logger.info('>> TREAT :: ('+ str(COUNTER) +')['+item.__class__.__name__+'] '\ + + item.absolute_url()) yield item if getattr(item, 'objectIds', None) and \ - item.objectIds(): + item.objectIds() and \ + item.__class__.__name__ not in NON_FOLDERISH_CLASSNAME: for subitem in walk(item): yield subitem def write(items): + global COUNTER for item in items: - if item.__class__.__name__ not in CLASSNAME_TO_WAPPER_MAP.keys(): + if item.__class__.__name__\ + not in CLASSNAME_TO_WAPPER_MAP.keys(): import pdb; pdb.set_trace() raise Exception, 'No wrapper defined for "'+item.__class__.__name__+ \ '" ('+item.absolute_url()+').' try: + dictionary = CLASSNAME_TO_WAPPER_MAP[item.__class__.__name__](item) write_to_jsonfile(dictionary) COUNTER += 1 + if (COUNTER % MAX_CACHE_DB)==0: + logger.info('Purge ZODB cache') + [item.Control_Panel.Database[x]._getDB().cacheMinimize() \ + for x in item.Control_Panel.Database.getDatabaseNames()] except: - import pdb; pdb.set_trace() + print "there is an error on %s" % item.absolute_url() + import pdb;pdb.set_trace(); + raise def write_to_jsonfile(item): global COUNTER + SUB_TMPDIR = os.path.join(TMPDIR, str(COUNTER/1000)) # 1000 files per folder, so we dont reach some fs limit if not os.path.isdir(SUB_TMPDIR): @@ -130,9 +252,12 @@ def write_to_jsonfile(item): item2['attachedImage'][0] = os.path.join(str(COUNTER/1000), str(COUNTER)+'.json-file-'+str(datafield_counter)) f.close() datafield_counter += 1 - + f = open(os.path.join(SUB_TMPDIR, str(COUNTER)+'.json'), 'wb') - simplejson.dump(item, f, indent=4) + try: + simplejson.dump(item, f, indent=4) + except: + import pdb;pdb.set_trace(); f.close() @@ -142,6 +267,16 @@ def getPermissionMapping(acperm): result[entry[0]] = entry[1] return result +def safe_decode(s, charset, errors): + if type(s) is type(u''): + return s + if hasattr(s, 'decode'): + return s.decode(charset, errors) + + if s.__class__.__name__ == 'BaseUnit': + return str(s).decode(charset, errors) + else: + return s class BaseWrapper(dict): """Wraps the dublin core metadata and pass it as tranmogrifier friendly style @@ -163,17 +298,17 @@ def __init__(self, obj): self['_type'] = self.obj.__class__.__name__ self['id'] = obj.getId() - self['title'] = obj.title.decode(self.charset, 'ignore') - self['description'] = obj.description.decode(self.charset, 'ignore') + self['title'] = safe_decode(obj.title,self.charset, 'ignore') + self['description'] = safe_decode(obj.description,self.charset, 'ignore') self['language'] = obj.language - self['rights'] = obj.rights.decode(self.charset, 'ignore') + self['rights'] = safe_decode(obj.rights,self.charset, 'ignore') # for DC attrs that are tuples for attr in ('subject', 'contributors'): self[attr] = [] val_tuple = getattr(obj, attr, False) if val_tuple: for val in val_tuple: - self[attr].append(val.decode(self.charset, 'ignore')) + self[attr].append(safe_decode(val,self.charset, 'ignore')) self[attr] = tuple(self[attr]) # for DC attrs that are DateTimes datetimes_dict = {'creation_date': 'creation_date', @@ -192,16 +327,27 @@ def __init__(self, obj): for w in workflow_history: for i, w2 in enumerate(workflow_history[w]): workflow_history[w][i]['time'] = str(workflow_history[w][i]['time']) - workflow_history[w][i]['comments'] = workflow_history[w][i]['comments'].decode(self.charset, 'ignore') + workflow_history[w][i]['comments'] = safe_decode(workflow_history[w][i]['comments'],self.charset, 'ignore') except: import pdb; pdb.set_trace() self['_workflow_history'] = workflow_history # default view - _browser = '/'.join(self.portal_utils.browserDefault(aq_base(obj))[1]) - if _browser not in ['folder_listing']: - self['_layout'] = '' - self['_defaultpage'] = _browser + if 'layout' in obj.__dict__: + self['_layout'] = obj.__dict__['layout'] + try: + _browser = self.portal_utils.browserDefault(aq_base(obj))[1] + except: + _browser = None + if _browser: + ## _browser can be value [None] + try: + _browser = '/'.join(_browser) + except: + _browser = '' + if _browser not in ['folder_listing']: + self['_layout'] = '' + self['_defaultpage'] = _browser #elif obj.getId() != 'index_html': # self['_layout'] = _browser # self['_defaultpage'] = '' @@ -219,7 +365,7 @@ def __init__(self, obj): if typ == 'string': if getattr(val, 'decode', False): try: - val = val.decode(self.charset, 'ignore') + val = safe_decode(val,self.charset, 'ignore') except UnicodeEncodeError: val = unicode(val) else: @@ -281,14 +427,15 @@ def decode(self, s, encodings=('utf8', 'latin1', 'ascii')): return s.decode(encoding) except UnicodeDecodeError: pass - return s.decode(test_encodings[0], 'ignore') + return safe_decode(s,test_encodings[0], 'ignore') class DocumentWrapper(BaseWrapper): def __init__(self, obj): super(DocumentWrapper, self).__init__(obj) - self['text'] = obj.text.decode(self.charset, 'ignore') + if hasattr(obj, 'text'): + self['text'] = safe_decode(obj.text,self.charset, 'ignore') class I18NFolderWrapper(BaseWrapper): @@ -299,10 +446,10 @@ def __init__(self, obj): lang = obj.getDefaultLanguage() data = obj.folder_languages.get(lang, None) if data is not None: - self['title'] = data['title'].decode(self.charset, 'ignore') - self['description'] = data['description'].decode(self.charset, 'ignore') + self['title'] = safe_decode(data['title'],self.charset, 'ignore') + self['description'] = safe_decode(data['description'],self.charset, 'ignore') else: - print 'ERROR: Cannot get default data for I18NFolder "%s"' % self['_path'] + logger.error('ERROR: Cannot get default data for I18NFolder "%s"' % self['_path']) # delete empty title in properties for prop in self['_properties']: @@ -316,7 +463,7 @@ def __init__(self, obj): data = obj.folder_languages[lang] for field in data: self['_properties'].append(['%s_%s' % (lang, field), - data[field].decode(self.charset, 'ignore'), + safe_decode(data[field],self.charset, 'ignore'), 'text']) @@ -324,7 +471,7 @@ class LinkWrapper(BaseWrapper): def __init__(self, obj): super(LinkWrapper, self).__init__(obj) - self['remoteUrl'] = obj.remote_url + self['remoteUrl'] = obj.remote_url() class NewsItemWrapper(DocumentWrapper): @@ -380,6 +527,7 @@ def __init__(self, obj): self['_datafield_file'] = data + class ImageWrapper(BaseWrapper): def __init__(self, obj): @@ -397,18 +545,19 @@ def __init__(self, obj): super(EventWrapper, self).__init__(obj) self['startDate'] = str(obj.start_date) self['endDate'] = str(obj.end_date) - self['location'] = obj.location.decode(self.charset, 'ignore') - self['contactName'] = obj.contact_name.decode(self.charset, 'ignore') - self['contactEmail'] = obj.contact_email - self['contactPhone'] = obj.contact_phone - self['eventUrl'] = obj.event_url + self['location'] = safe_decode(obj.location,self.charset, 'ignore') + self['contactName'] = safe_decode(obj.contact_name(),self.charset, 'ignore') + self['contactEmail'] = obj.contact_email() + self['contactPhone'] = obj.contact_phone() + self['eventUrl'] = obj.event_url() class ArchetypesWrapper(BaseWrapper): def __init__(self, obj): + super(ArchetypesWrapper, self).__init__(obj) - + fields = obj.schema.fields() for field in fields: type_ = field.__class__.__name__ @@ -443,22 +592,23 @@ def __init__(self, obj): self[unicode(field.__name__)] = ['/'+i.absolute_url() for i in value] else: self[unicode(field.__name__)] = value.absolute_url() - elif type_ in ['ImageField', 'FileField']: + elif type_ in ['ImageField', 'FileField', 'AttachmentField']: fieldname = unicode('_data_'+field.__name__) value = field.get(obj) value2 = value if type(value) is not str: value = str(value.data) if value: - size = value2.getSize() + self['__datafields__'].append(fieldname) - self[fieldname] = { - 'data': value, - 'size': size, } + self[fieldname] = value + elif type_ in ['ComputedField']: pass + else: - raise 'Unknown field type for ArchetypesWrapper.' + + raise 'Unknown field type for ArchetypesWrapper : %s' % type_ def _guessFilename(self, data, fname='', mimetype='', default=''): """ @@ -487,12 +637,12 @@ def __init__(self, obj): super(I18NLayerWrapper, self).__init__(obj) lang = obj.portal_properties.site_properties.default_language if lang not in obj.objectIds(): - print 'ERROR: Cannot get default data for I18NLayer "%s"' % self['_path'] + logger.error('ERROR: Cannot get default data for I18NLayer "%s"' % self['_path']) else: real = obj[lang] - self['title'] = real.title.decode(self.charset, 'ignore') - self['description'] = real.description.decode(self.charset, 'ignore') - self['text'] = real.text.decode(self.charset, 'ignore') + self['title'] = safe_decode(real.title,self.charset, 'ignore') + self['description'] = safe_decode(real.description,self.charset, 'ignore') + self['text'] = safe_decode(real.text,self.charset, 'ignore') # Not lose information: generate properites es_title, en_title, etc. # TODO: Export all archetypes, but I don't need now, only document important fields @@ -502,13 +652,90 @@ def __init__(self, obj): text = content.text) for field in data: self['_properties'].append(['%s_%s' % (lang, field), - data[field].decode(self.charset, 'ignore'), + safe_decode(data[field],self.charset, 'ignore'), 'text']) +def generateUniqueId(type_name=None): + """ + Generate an id for the content + This is not the archetype's uid. + """ + from DateTime import DateTime + from random import random + + now = DateTime() + time = '%s.%s' % (now.strftime('%Y-%m-%d'), str(now.millis())[7:]) + rand = str(random())[2:6] + prefix = '' + suffix = '' + + if type_name is not None: + prefix = type_name.replace(' ', '_') + '.' + prefix = prefix.lower() + + return prefix + time + rand + suffix + +class Article322Wrapper(NewsItemWrapper): + + def __init__(self, obj): + super(Article322Wrapper, self).__init__(obj) + #(Pdb) self.__ordered_attachment_refs__.getItems() + #['4e952a8c3af4b1bcedf38d475ac6049d'] + d = {'__ordered_attachment_refs__' : ('_plonearticle_attachments', + 'FileProxy', + 'attachedFile', + 'getFile'), + '__ordered_image_refs__' : ('_plonearticle_images', + 'ImageProxy', + 'attachedImage', + 'getImage'), + + '__ordered_link_refs__' : ('_plonearticle_refs', + 'LinkProxy', + 'attachedLink', + 'getRemoteUrl')} + + ids = obj.objectIds() + for x in d: + slot_name = d[x][0] + id_name = d[x][1] + field_name = d[x][2] + accessor = d[x][3] + setattr(self, slot_name, []) + for refid in getattr(obj,x).getItems(): + ref = None + try: + ref = getattr(obj.at_references, refid).getTargetObject() + except: + ## ghost ref + logger.exception("Attribut rror during migration on %s"\ + % str(obj)) + continue ## just ignore it... + inner = { + 'id': (generateUniqueId(id_name), {}), + 'title': (safe_decode(ref.Title(), + self.charset, 'ignore'), {}), + 'description': (safe_decode(ref.Description(), + self.charset, + 'ignore'), {}),} + if ref.id in ids: + ### internal + inner[field_name] = (getattr(ref, accessor)(), {}) + else: + #### external + inner['referencedContent'] = (ref.UID(), {}) + getattr(self, slot_name).append(inner) + + + + + + class ArticleWrapper(NewsItemWrapper): def __init__(self, obj): + super(ArticleWrapper, self).__init__(obj) try: self['cooked_text'] = obj.cooked_text.decode(self.charset) @@ -520,8 +747,8 @@ def __init__(self, obj): item = obj[item_id] plonearticle_attachments.append({ 'id': (item_id, {}), - 'title': (item.title.decode(self.charset, 'ignore'), {}), - 'description': (item.description.decode(self.charset, 'ignore'), {}), + 'title': (safe_decode(item.title, self.charset, 'ignore'), {}), + 'description': (safe_decode(item.description, self.charset, 'ignore'), {}), 'attachedFile': [item.getFile(), {}], }) self['_plonearticle_attachments'] = plonearticle_attachments @@ -531,8 +758,8 @@ def __init__(self, obj): item = obj[item_id] plonearticle_images.append({ 'id': (item_id, {}), - 'title': (item.title.decode(self.charset, 'ignore'), {}), - 'description': (item.description.decode(self.charset, 'ignore'), {}), + 'title': (safe_decode(item.title, self.charset, 'ignore'), {}), + 'description': (safe_decode(self.charset, 'ignore'), {}), 'attachedImage': [str(item.data), {}], }) self['_plonearticle_images'] = plonearticle_images @@ -624,34 +851,48 @@ def __init__(self, obj): # self['__datafields__'].append('document_src') # TODO: should be also possible to set it with through parameters -CLASSNAME_TO_WAPPER_MAP = { - 'LargePloneFolder': BaseWrapper, - 'Folder': BaseWrapper, - 'PloneSite': BaseWrapper, - 'PloneFolder': BaseWrapper, - 'Document': DocumentWrapper, - 'File': FileWrapper, - 'Image': ImageWrapper, - 'Link': LinkWrapper, - 'Event': EventWrapper, - 'NewsItem': NewsItemWrapper, - 'Favorite': LinkWrapper, - 'Topic': BaseWrapper, - 'ListCriterion': ListCriteriaWrapper, - 'SimpleStringCriterion': StringCriteriaWrapper, - 'SortCriterion': SortCriteriaWrapper, - 'FriendlyDateCriterion': DateCriteriaWrapper, - - # custom ones - 'I18NFolder': I18NFolderWrapper, - 'I18NLayer': I18NLayerWrapper, - 'PloneArticle': ArticleWrapper, - 'ZPhotoSlides': ZPhotoSlidesWrapper, - 'ZPhoto': ZPhotoWrapper, - 'PloneLocalFolderNG': ArchetypesWrapper, - 'LocalFS': LocalFSWrapper, - 'ContentPanels': ContentPanels, - 'DTMLMethod': ZopeObjectWrapper, - 'ZopePageTemplate': ZopeObjectWrapper, - -} +CLASSNAME_TO_WAPPER_MAP = {} +if CONFIG.has_section('CLASSNAME_TO_WAPPER_MAP'): + for x in CONFIG.items('CLASSNAME_TO_WAPPER_MAP'): + + try: + CLASSNAME_TO_WAPPER_MAP[x[0]] = eval(x[1].strip()) + logger.debug("map %s to %s" % (x[0], x[1]) ) + except: + logger.info("cant add class for mapping %s" % x[0]) + pass +else: + print "load default CLASSNAME_TO_WAPPER_MAP" + CLASSNAME_TO_WAPPER_MAP = { + 'LargePloneFolder': BaseWrapper, + 'Folder': BaseWrapper, + 'PloneSite': BaseWrapper, + 'PloneFolder': BaseWrapper, + 'Document': DocumentWrapper, + 'File': FileWrapper, + 'Image': ImageWrapper, + 'Link': LinkWrapper, + 'Event': EventWrapper, + 'NewsItem': NewsItemWrapper, + 'Favorite': LinkWrapper, + 'Topic': BaseWrapper, + 'ListCriterion': ListCriteriaWrapper, + 'SimpleStringCriterion': StringCriteriaWrapper, + 'SortCriterion': SortCriteriaWrapper, + 'FriendlyDateCriterion': DateCriteriaWrapper, + + # custom ones + 'I18NFolder': I18NFolderWrapper, + 'I18NLayer': I18NLayerWrapper, + 'PloneArticle': ArticleWrapper, + 'ZPhotoSlides': ZPhotoSlidesWrapper, + 'ZPhoto': ZPhotoWrapper, + 'PloneLocalFolderNG': ArchetypesWrapper, + 'LocalFS': LocalFSWrapper, + 'ContentPanels': ContentPanels, + 'DTMLMethod': ZopeObjectWrapper, + 'ZopePageTemplate': ZopeObjectWrapper, + + } + + From 4db2f823d7daffd6ed78cda4c982cd3892aebec4 Mon Sep 17 00:00:00 2001 From: Boussard Date: Thu, 24 Feb 2011 12:11:49 +0100 Subject: [PATCH 02/29] add some documentation to plone2.0_export.rst --- docs/plone2.0_export.rst | 181 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 181 insertions(+) create mode 100644 docs/plone2.0_export.rst diff --git a/docs/plone2.0_export.rst b/docs/plone2.0_export.rst new file mode 100644 index 0000000..e1a2ce9 --- /dev/null +++ b/docs/plone2.0_export.rst @@ -0,0 +1,181 @@ +``plone2.0_export.py`` +====================== + +Export data from an old plone site. + +Installation +------------ + +* Create an external method in your plone site. + + - Copy collective.blueprint.jsonmigrator/export_scripts/plone2.0_export.pt in *INSTANCE/Extensions* directory + - Connect to ZMI + - Add an External Method, and fill out the form with :: + + id = your_id + module name = plone_2.0export + method = export_plone20 + +* Create an jsonmigrator.ini in order to configure export process. + + +Syntax of configuration +----------------------- + + +Options ++++++++ + + * In DEFAULT section + + - HOMEDIR => where we create json file. This directory must exists !! Each time that export process is invoked, an new folder is created . In each folder created , every 1000 objects created, script create an new folder. The directory struture look like that:: + + HOMEDIR + |_ _ + |_ 0 + |_ 1.json + |_ 2.json + |_ ... + |_ 999.json + |_ 1 + |_ 1000.json + |_ 1001.json + |_ ... + |_ 1999.json + .... + + You can have also file name loke xxx.json-file-x . This is binary file of exported content. + + - CLASSNAME_TO_SKIP_LAUD => This is a list of classname. Object of this classname where are skip by the export process + + - CLASSNAME_TO_SKIP => This is a list of classname. Object of this classname where are skip by the export process + + - ID_TO_SKIP => This is a list of id object . Object wich id is equal to an member of this list is skipping of the process. + + - NON_FOLDERISH_CLASSNAME => This is a list of classname. Object of this classname are considered as non folderish content. + + - JUST_TREAT_WAPPER => If true CLASSNAME_TO_SKIP_LAUD and CLASSNAME_TO_SKIP have no effect. Just object that are mapping in CLASSNAME_TO_WAPPER_MAP are treated + + - MAX_CACHE_DB => a int number that indicate when the process purge the zodb cache (avoid memory error) + + * In CLASSNAME_TO_WAPPER_MAP + + - ClassName=Wrapper => you configure the export wrapper use for object of ClassName + + +Example ++++++++ + +:: + + [DEFAULT] + HOMEDIR=c:\dt\plone2.1\export + JUST_TREAT_WAPPER=True + NON_FOLDERISH_CLASSNAME=DPLDTArticle + DPLDTIssue + DPLDTPerformance + DPLDTTraining + MAX_CACHE_DB=250 + + [CLASSNAME_TO_WAPPER_MAP] + LargePloneFolder=BaseWrapper + Folder=BaseWrapper + PloneSite=BaseWrapper + PloneFolder=BaseWrapper + Document=DocumentWrapper + File=FileWrapper + YourSpecificContentType=ArchetypesWrapper + + +Existing Wrapper +++++++++++++++++ + + .. literalinclude:: ../export_scripts/plone2.0_export.py + :pyobject: BaseWrapper + :end-before: def + + + .. literalinclude:: ../export_scripts/plone2.0_export.py + :pyobject: DocumentWrapper + :end-before: def + + .. literalinclude:: ../export_scripts/plone2.0_export.py + :pyobject: I18NFolderWrapper + :end-before: def + + .. literalinclude:: ../export_scripts/plone2.0_export.py + :pyobject: LinkWrapper + :end-before: def + + .. literalinclude:: ../export_scripts/plone2.0_export.py + :pyobject: NewsItemWrapper + :end-before: def + + .. literalinclude:: ../export_scripts/plone2.0_export.py + :pyobject: ListCriteriaWrapper + :end-before: def + + .. literalinclude:: ../export_scripts/plone2.0_export.py + :pyobject: StringCriteriaWrapper + :end-before: def + + .. literalinclude:: ../export_scripts/plone2.0_export.py + :pyobject: SortCriteriaWrapper + :end-before: def + + .. literalinclude:: ../export_scripts/plone2.0_export.py + :pyobject: DateCriteriaWrapper + :end-before: def + + .. literalinclude:: ../export_scripts/plone2.0_export.py + :pyobject: FileWrapper + :end-before: def + + .. literalinclude:: ../export_scripts/plone2.0_export.py + :pyobject: ImageWrapper + :end-before: def + + .. literalinclude:: ../export_scripts/plone2.0_export.py + :pyobject: EventWrapper + :end-before: def + + .. literalinclude:: ../export_scripts/plone2.0_export.py + :pyobject: ArchetypesWrapper + :end-before: def + + .. literalinclude:: ../export_scripts/plone2.0_export.py + :pyobject: I18NLayerWrapper + :end-before: def + + .. literalinclude:: ../export_scripts/plone2.0_export.py + :pyobject: Article322Wrapper + :end-before: def + + .. literalinclude:: ../export_scripts/plone2.0_export.py + :pyobject: ArticleWrapper + :end-before: def + + .. literalinclude:: ../export_scripts/plone2.0_export.py + :pyobject: ZPhotoWrapper + :end-before: def + + .. literalinclude:: ../export_scripts/plone2.0_export.py + :pyobject: ZPhotoSlidesWrapper + :end-before: def + + .. literalinclude:: ../export_scripts/plone2.0_export.py + :pyobject: ContentPanels + :end-before: def + + .. literalinclude:: ../export_scripts/plone2.0_export.py + :pyobject: LocalFSWrapper + :end-before: def + + .. literalinclude:: ../export_scripts/plone2.0_export.py + :pyobject: ZopeObjectWrapper + :end-before: def + + + + + From 2d3c65724a3eab902f7e69175010b29ffaa54946 Mon Sep 17 00:00:00 2001 From: Boussard Date: Thu, 24 Feb 2011 12:12:05 +0100 Subject: [PATCH 03/29] add some documentation to plone2.0_export.rst --- docs/index.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/index.rst b/docs/index.rst index 60b0499..a2ccdd4 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -9,7 +9,7 @@ List of blueprints built around ``collective.blueprints.jsonmigrator.source`` with purpose of providing flexible infrastructure to do migrations in Plone. In source of this package in ``exports_scripts`` directory is also a helping -export script ``plone2.0_export.py`` which provides a external method +export script :doc:`plone2.0_export` which provides a external method ``export_plone20`` to export data from Plone 2.0 (script might also work with higher versions of plone 2.1, 2.5, but was not tested) in format that is suitable for ``collective.blueprints.jsonmigrator.source`` blueprint. @@ -34,3 +34,4 @@ And if you might forgot, migration is a bitch ... so have fun :P .. _`collective.transmogrifier`: http://pypi.python.org/pypi/collective.transmogrifier .. _`Plone`: http://plone.org + From e0c3acf053c67d7ecb5b5d84f3ad34666791d490 Mon Sep 17 00:00:00 2001 From: Boussard Date: Thu, 24 Feb 2011 12:12:21 +0100 Subject: [PATCH 04/29] add some documentation to wrappers --- export_scripts/plone2.0_export.py | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/export_scripts/plone2.0_export.py b/export_scripts/plone2.0_export.py index 0d12817..e8260cf 100644 --- a/export_scripts/plone2.0_export.py +++ b/export_scripts/plone2.0_export.py @@ -431,6 +431,7 @@ def decode(self, s, encodings=('utf8', 'latin1', 'ascii')): class DocumentWrapper(BaseWrapper): + """ An wrapper to an Document. Object must have an text attribute """ def __init__(self, obj): super(DocumentWrapper, self).__init__(obj) @@ -439,7 +440,8 @@ def __init__(self, obj): class I18NFolderWrapper(BaseWrapper): - + """ An wrapper to an I18NFolder """ + def __init__(self, obj): super(I18NFolderWrapper, self).__init__(obj) # We are ignoring another languages @@ -468,6 +470,7 @@ def __init__(self, obj): class LinkWrapper(BaseWrapper): + """ An wrapper to ATLink """ def __init__(self, obj): super(LinkWrapper, self).__init__(obj) @@ -475,6 +478,8 @@ def __init__(self, obj): class NewsItemWrapper(DocumentWrapper): + """ An wrapper to NewsItem """ + def __init__(self, obj): super(NewsItemWrapper, self).__init__(obj) @@ -482,6 +487,7 @@ def __init__(self, obj): class ListCriteriaWrapper(BaseWrapper): + """ An wrapper to ListCriteria """ def __init__(self, obj): super(ListCriteriaWrapper, self).__init__(obj) @@ -491,6 +497,7 @@ def __init__(self, obj): class StringCriteriaWrapper(BaseWrapper): + """ An wrapper to StringCriteria """ def __init__(self, obj): super(StringCriteriaWrapper, self).__init__(obj) @@ -499,6 +506,7 @@ def __init__(self, obj): class SortCriteriaWrapper(BaseWrapper): + """ An wrapper to SortStringCriteria """ def __init__(self, obj): super(SortCriteriaWrapper, self).__init__(obj) @@ -507,6 +515,7 @@ def __init__(self, obj): class DateCriteriaWrapper(BaseWrapper): + """ An wrapper to DateCriteria """ def __init__(self, obj): super(DateCriteriaWrapper, self).__init__(obj) @@ -517,8 +526,11 @@ def __init__(self, obj): class FileWrapper(BaseWrapper): + """ An wrapper to OFSFile """ + def __init__(self, obj): + super(FileWrapper, self).__init__(obj) self['__datafields__'].append('_datafield_file') data = str(obj.data) @@ -529,8 +541,10 @@ def __init__(self, obj): class ImageWrapper(BaseWrapper): + """ An wrapper to OFSImage """ def __init__(self, obj): + super(ImageWrapper, self).__init__(obj) self['__datafields__'].append('_datafield_image') data = str(obj.data) @@ -540,6 +554,7 @@ def __init__(self, obj): class EventWrapper(BaseWrapper): + """ An wrapper to ATEvent """ def __init__(self, obj): super(EventWrapper, self).__init__(obj) @@ -553,6 +568,7 @@ def __init__(self, obj): class ArchetypesWrapper(BaseWrapper): + """ An wrapper to Archetype Object """ def __init__(self, obj): @@ -632,6 +648,7 @@ def _guessFilename(self, data, fname='', mimetype='', default=''): class I18NLayerWrapper(ArchetypesWrapper): + """ An wrapper to I18N Archetype Object """ def __init__(self, obj): super(I18NLayerWrapper, self).__init__(obj) @@ -676,6 +693,7 @@ def generateUniqueId(type_name=None): return prefix + time + rand + suffix class Article322Wrapper(NewsItemWrapper): + """ An wrapper to Old Plone Article Object (version<4)""" def __init__(self, obj): super(Article322Wrapper, self).__init__(obj) @@ -733,6 +751,7 @@ def __init__(self, obj): class ArticleWrapper(NewsItemWrapper): + """ An wrapper to Plone Article Object (version>=4)""" def __init__(self, obj): @@ -766,6 +785,7 @@ def __init__(self, obj): class ZPhotoWrapper(BaseWrapper): + """ An wrapper to ZPhoto """ def __init__(self, obj): super(ZPhotoWrapper, self).__init__(obj) @@ -783,6 +803,7 @@ def __init__(self, obj): class ZPhotoSlidesWrapper(BaseWrapper): + """ An wrapper to ZPhotoSlide """ def __init__(self, obj): super(ZPhotoSlidesWrapper, self).__init__(obj) @@ -830,6 +851,7 @@ def __init__(self, obj): class ContentPanels(BaseWrapper): + """ An wrapper to ControlPanel """ def __init__(self, obj): super(ContentPanels, self).__init__(obj) @@ -837,6 +859,7 @@ def __init__(self, obj): class LocalFSWrapper(BaseWrapper): + """ An wrapper to FS object """ def __init__(self, obj): super(LocalFSWrapper, self).__init__(obj) @@ -844,6 +867,7 @@ def __init__(self, obj): class ZopeObjectWrapper(BaseWrapper): + """ An wrapper to Zope object """ def __init__(self, obj): super(ZopeObjectWrapper, self).__init__(obj) From 4ea9bdb49e152a3451619900cb18c944c11177d0 Mon Sep 17 00:00:00 2001 From: Boussard Date: Mon, 28 Feb 2011 11:05:06 +0100 Subject: [PATCH 05/29] logger.exception and warnig was redondant and exception have an mandatory argument --- export_scripts/plone2.0_export.py | 1 - 1 file changed, 1 deletion(-) diff --git a/export_scripts/plone2.0_export.py b/export_scripts/plone2.0_export.py index e8260cf..ab7050e 100644 --- a/export_scripts/plone2.0_export.py +++ b/export_scripts/plone2.0_export.py @@ -28,7 +28,6 @@ CONFIG.readfp(open(os.path.join(getConfiguration().instancehome, 'jsonmigrator.ini'))) except: - logger.exception() logger.warning('Please specify ini file jsonmigrator.ini in your %s' \ % getConfiguration().instancehome) From 6e607cf0575419fb5ca6cf9f22f8bac6ee484b9b Mon Sep 17 00:00:00 2001 From: Boussard Date: Wed, 2 Mar 2011 21:17:57 +0100 Subject: [PATCH 06/29] change the name of section --- docs/permission_mapping.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/permission_mapping.rst b/docs/permission_mapping.rst index 901f48e..77abf6a 100644 --- a/docs/permission_mapping.rst +++ b/docs/permission_mapping.rst @@ -25,7 +25,7 @@ Configuration:: ... - [mimetype] + [permission_mapping] blueprint = collective.blueprint.jsonmigrator.permission_mapping Data in pipeline:: From 69b9033001e947b5739edbc93e84b04a1ff2d796 Mon Sep 17 00:00:00 2001 From: Boussard Date: Tue, 12 Apr 2011 10:02:35 +0200 Subject: [PATCH 07/29] add a new blueprint for plonearticle --- .../blueprint/jsonmigrator/blueprint.py | 199 +++++++++++++++++- 1 file changed, 190 insertions(+), 9 deletions(-) diff --git a/collective/blueprint/jsonmigrator/blueprint.py b/collective/blueprint/jsonmigrator/blueprint.py index 19fe7fc..0768e85 100644 --- a/collective/blueprint/jsonmigrator/blueprint.py +++ b/collective/blueprint/jsonmigrator/blueprint.py @@ -21,11 +21,23 @@ from Products.CMFCore.utils import getToolByName from Products.Archetypes.interfaces import IBaseObject +from Products.Archetypes.BaseUnit import BaseUnit from AccessControl.interfaces import IRoleManager +from Products.PloneArticle.interfaces import IPloneArticle + +## linguaplone migration +HAVE_LP = None +try: + from Products.LinguaPlone.interfaces import ITranslatable + HAVE_LP = True +except ImportError: + HAVE_LP = False DATAFIELD = '_datafield_' STATISTICSFIELD = '_statistics_field_prefix_' +import logging +logger = logging.getLogger('collective.blueprint.jsonmigrator') class JSONSource(object): """ """ @@ -45,6 +57,7 @@ def __init__(self, transmogrifier, name, options, previous): raise Exception, 'Path ('+str(self.path)+') does not exists.' self.datafield_prefix = options.get('datafield-prefix', DATAFIELD) + self.datafield_separator = options.get('datafield-separator', None) def __iter__(self): for item in self.previous: @@ -57,14 +70,25 @@ def __iter__(self): for item2 in sorted([int(j[:-5]) for j in os.listdir(os.path.join(self.path, str(item3))) if j.endswith('.json')]): - - f = open(os.path.join(self.path, str(item3), str(item2)+'.json')) + json_file_path = os.path.join(self.path, str(item3), + str(item2)+'.json') + f = open(json_file_path) + item = simplejson.loads(f.read()) + item['_json_file_path'] = json_file_path f.close() - for key in item.keys(): if key.startswith(self.datafield_prefix): - item[key] = os.path.join(self.path, item[key]) + + if self.datafield_separator: + + item[key]['path'] = item[key]['path'].replace(\ + self.datafield_separator, + os.path.sep) + #file_name = os.path.join(os.path.dirname(item[key][''] + # os.path.basename(item[key]['path'] + item[key]['path'] = os.path.join(self.path, + item[key]['path']) yield item @@ -480,6 +504,152 @@ def __iter__(self): yield item +class LinguaRelation(object): + """ an section about linguaplone """ + + classProvides(ISectionBlueprint) + implements(ISection) + + def __init__(self, transmogrifier, name, options, previous): + self.transmogrifier = transmogrifier + self.name = name + self.options = options + self.previous = previous + self.context = transmogrifier.context + + if 'path-key' in options: + pathkeys = options['path-key'].splitlines() + else: + pathkeys = defaultKeys(options['blueprint'], name, 'path') + self.pathkey = Matcher(*pathkeys) + + + + def __iter__(self): + for item in self.previous: + + pathkey = self.pathkey(*item.keys())[0] + + if not pathkey: # not enough info + yield item; continue + if not HAVE_LP: ## not LinguaPlone + yield item; continue + #if 'mission' in item[pathkey]: + # import pdb;pdb.set_trace(); + + obj = self.context.unrestrictedTraverse(item[pathkey].lstrip('/'), None) + if obj is None: # path doesn't exist + yield item; continue + if obj.getLanguage() != item['language']: + obj.setLanguage(item['language']) + + + if not ITranslatable.providedBy(obj): + yield item; continue ## not a linguaplone object + else: + canonical_path = item.get('_canonical') + language = item.get('language','') + if not canonical_path: + yield item; continue + try: + canonical = self.context.unrestrictedTraverse(canonical_path.lstrip('/'), None) + except: + yield item; continue + try: + if not canonical.hasTranslation(language): + canonical.addTranslationReference(obj) + yield item; continue + except: + yield item; continue + +class PloneArticleFields(object): + """ updata data for plonearticle fields """ + + classProvides(ISectionBlueprint) + implements(ISection) + + def __init__(self, transmogrifier, name, options, previous): + self.transmogrifier = transmogrifier + self.name = name + self.options = options + self.previous = previous + self.context = transmogrifier.context + + if 'path-key' in options: + pathkeys = options['path-key'].splitlines() + else: + pathkeys = defaultKeys(options['blueprint'], name, 'path') + self.pathkey = Matcher(*pathkeys) + self.datafield_separator = options.get('datafield-separator', None) + + def __iter__(self): + for item in self.previous: + pathkey = self.pathkey(*item.keys())[0] + if not pathkey: # not enough info + yield item; continue + #if item.get('_path') == 'direction-technique-division/administration-finance': + # import pdb;pdb.set_trace(); + + + + obj = self.context.unrestrictedTraverse(item[pathkey].lstrip('/'), None) + if obj is None: # path doesn't exist + yield item; continue + + def getUnit(x, field_name): + name = x['id'][0] + f_path = x[field_name][0]['data'] + x = x[field_name][0] + if self.datafield_separator: + f_path = f_path.replace(self.datafield_separator, + os.path.sep) + f_name = os.path.basename(f_path) + f_path = os.path.join(os.path.dirname(\ + item['_json_file_path']), + f_name) + f = open(f_path, mode = 'rb') + value = f.read() + unit = BaseUnit(name = name, + file = value, + mimetype = x.get('content_type', ''), + filename = x.get('filename', ''), + instance = obj) + f.close() + return unit + + + + if IPloneArticle.providedBy(obj): + + if '_plonearticle_images' in item and \ + len(item['_plonearticle_images']): + for (i, x) in enumerate(item['_plonearticle_images']) : + unit = getUnit(x,'attachedImage') + item['_plonearticle_images'][i]['attachedImage']=\ + (unit,{}) + obj.getField('images').set(obj,item['_plonearticle_images']) + if '_plonearticle_attachments' in item and\ + len(item['_plonearticle_attachments']): + + for (i, x) in enumerate(item['_plonearticle_attachments']): + unit = getUnit(x,'attachedFile') + item['_plonearticle_attachments'][i]['attachedFile'] =\ + (unit,{}) + obj.getField('files').set(obj, + item['_plonearticle_attachments']) + if '_plonearticle_refs' in item and \ + len(item['_plonearticle_refs']): + try: + obj.getField('links').set(obj, + item['_plonearticle_refs']) + except: + logger.error('cannot set links for %s %s' % \ + (item['_path'], + item['_json_file_path']) + ) + + + class DataFields(object): """ """ @@ -498,33 +668,44 @@ def __init__(self, transmogrifier, name, options, previous): else: pathkeys = defaultKeys(options['blueprint'], name, 'path') self.pathkey = Matcher(*pathkeys) + self.datafield_prefix = options.get('datafield-prefix', DATAFIELD) def __iter__(self): for item in self.previous: + pathkey = self.pathkey(*item.keys())[0] - if not pathkey: # not enough info yield item; continue + #if item.get('_path') == 'direction-technique-division/administration-finance': + # import pdb;pdb.set_trace(); + + obj = self.context.unrestrictedTraverse(item[pathkey].lstrip('/'), None) if obj is None: # path doesn't exist yield item; continue - + if IBaseObject.providedBy(obj): for key in item.keys(): if not key.startswith(self.datafield_prefix): continue - if not os.path.exists(item[key]): + if not os.path.exists(item[key].get('path','')): continue fieldname = key[len(self.datafield_prefix):] field = obj.getField(fieldname) - f = open(item[key]) + f = open(item[key]['path'],mode='rb') value = f.read() + unit = BaseUnit(name = fieldname, + file = value, + mimetype = item[key].get('content_type',''), + filename = item[key].get('filename',''), + instance = obj + ) f.close() if len(value) != len(field.get(obj)): - field.set(obj, value) + field.set(obj, unit) yield item From e0b353da51ad49f8d139ac93c319c50b9ba52553 Mon Sep 17 00:00:00 2001 From: Boussard Date: Tue, 12 Apr 2011 10:36:26 +0200 Subject: [PATCH 08/29] add a new blueprint for plonearticle and linguaplone relation --- docs/linguarelation.rst | 38 ++++++++++++++ docs/plonearticle.rst | 114 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 152 insertions(+) create mode 100644 docs/linguarelation.rst create mode 100644 docs/plonearticle.rst diff --git a/docs/linguarelation.rst b/docs/linguarelation.rst new file mode 100644 index 0000000..1f5ebc0 --- /dev/null +++ b/docs/linguarelation.rst @@ -0,0 +1,38 @@ +``collective.blueprint.jsonmigrator.linguarelation`` +==================================================== + +Set linguaplone relaation between contents. + +Configuration options +--------------------- + + +Expected data structure in pipeline: + + * **_canonical**: path of canonical object + + +Example +------- + +This example will try to store content of ``0/1.json-file-1`` + +Configuration:: + + [tranmogrifier] + pipeline = + source + plonearticle + + ... + + [datafields] + blueprint = collective.blueprint.jsonmigrator.linguarelation + +Data in pipeline:: + + { + "_path": "/Plone/index_html-fr", + "_canonical": "/Plone/index_html", + + } diff --git a/docs/plonearticle.rst b/docs/plonearticle.rst new file mode 100644 index 0000000..22b9553 --- /dev/null +++ b/docs/plonearticle.rst @@ -0,0 +1,114 @@ +``collective.blueprint.jsonmigrator.plonearticle`` +================================================== + +Update images, files, links for plone article contents. + +Configuration options +--------------------- + +datafield-separator : os separator in case that export is provided by windows system + +Expected data structure in pipeline: + + * **_plonearticle_attachments**: information of attached files + * **_plonearticle_refs**: information of attached refs + * **_plonearticle_images** : information of attached images + +Option configuration: + + * datafield-separator : src os separator + * path-key : for changing the path key + +Example +------- + +This example will try to store content of ``0/1.json-file-1`` + +Configuration:: + + [tranmogrifier] + pipeline = + source + plonearticle + + ... + + [datafields] + blueprint = collective.blueprint.jsonmigrator.plonearticle + +Data in pipeline:: + + { + "_path": "/Plone/index_html", + "_plonearticle_refs": [ + { + "description": [ + "Missions", + {} + ], + "referencedContent": [ + "125d3b5fd50e0da288bfb1d0751a60f7", + {} + ], + "id": [ + "linkproxy.2011-04-10.5244530114", + {} + ], + "title": [ + "missions", + {} + ] + } + ], + "_plonearticle_attachments": [ + { + "attachedFile": [ + { + + "filename": "Voeux_JPA_VF.doc", + "content_type": "application/msword", + "data": "0\\1.json-file-1", + "size": 29184 + }, + {} + ], + "description": [ + "", + {} + ], + "id": [ + "fileproxy.2011-04-10.5244535753", + {} + ], + "title": [ + "VOEUX 2009 DE J.P AGON", + {} + ] + }, + { + "attachedFile": [ + { + "filename": "IMG_0026 1.JPG", + "content_type": "image/jpeg", + "data": "0\\1.json-file-2", + "size": 1228698 + }, + {} + ], + "description": [ + "", + {} + ], + "id": [ + "fileproxy.2011-04-10.5244539481", + {} + ], + "title": [ + "File.doc", + {} + ] + } + ], + + + } From 553099083ffb7702c48024fd7d1c8896b607d88b Mon Sep 17 00:00:00 2001 From: Boussard Date: Tue, 12 Apr 2011 10:37:07 +0200 Subject: [PATCH 09/29] update documentation --- docs/datafields.rst | 13 ++++++++++++- docs/index.rst | 2 ++ docs/jsonsource.rst | 11 +++++++++++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/docs/datafields.rst b/docs/datafields.rst index aa2bb08..0696fbd 100644 --- a/docs/datafields.rst +++ b/docs/datafields.rst @@ -16,6 +16,12 @@ Expected data structure in pipeline: * **_path**: path to object on which we want to change local roles. * **_datafield_**: field which needs to store data +Option configuration: + + * datafield-prefix : for changing the prefix (by default _datafield_) + * path-key : for changing the path key + * datafield-separator : for changing separator of prefix + Example ------- @@ -38,5 +44,10 @@ Data in pipeline:: { "_path": "/Plone/index_html", - "_datafield_attachment": "0/1.json-file-1", + "_datafield_attachment": {"filename": "DAF.jpg", + "content_type": "image/jpeg", + "path": "0\\20.json-file-1", + "height": 605, + "size": 63912, + } } diff --git a/docs/index.rst b/docs/index.rst index a2ccdd4..9ce440b 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -31,6 +31,8 @@ And if you might forgot, migration is a bitch ... so have fun :P owner ac_local_roles datafields + plonearticle + linguarelation .. _`collective.transmogrifier`: http://pypi.python.org/pypi/collective.transmogrifier .. _`Plone`: http://plone.org diff --git a/docs/jsonsource.rst b/docs/jsonsource.rst index e336be7..d578956 100644 --- a/docs/jsonsource.rst +++ b/docs/jsonsource.rst @@ -11,6 +11,12 @@ Parameters Also possible to specify in ``some.package:path/to/json/directory`` way. +:path-separator: + os path separator use in json file (in case of json file is created on windows) + +:datafield-prefix: + prefix for indicate file fields prefix. Path is transformed by this blue print + Example ------- @@ -23,6 +29,8 @@ Configuration:: [source] blueprint = collective.blueprint.jsonmigrator.source path = some.package:/path/to/json/dir + path-separator = \ + datafield-prefix = _data_ JSON files structure:: @@ -43,4 +51,7 @@ JSON file:: "_path": "/Plone/front-page", "_type": "Document", ... + "_data_backgroundImage": { + "path": "0\\20.json-file-1", + }, } From 6626c34d9bc24d75e824947f05d4b4eb0f9f0c24 Mon Sep 17 00:00:00 2001 From: Boussard Date: Tue, 12 Apr 2011 10:37:29 +0200 Subject: [PATCH 10/29] add a new blueprint for plonearticle and plone relation --- collective/blueprint/jsonmigrator/blueprint.py | 10 ---------- collective/blueprint/jsonmigrator/configure.zcml | 10 ++++++++++ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/collective/blueprint/jsonmigrator/blueprint.py b/collective/blueprint/jsonmigrator/blueprint.py index 0768e85..4c691f7 100644 --- a/collective/blueprint/jsonmigrator/blueprint.py +++ b/collective/blueprint/jsonmigrator/blueprint.py @@ -587,11 +587,6 @@ def __iter__(self): pathkey = self.pathkey(*item.keys())[0] if not pathkey: # not enough info yield item; continue - #if item.get('_path') == 'direction-technique-division/administration-finance': - # import pdb;pdb.set_trace(); - - - obj = self.context.unrestrictedTraverse(item[pathkey].lstrip('/'), None) if obj is None: # path doesn't exist yield item; continue @@ -678,11 +673,6 @@ def __iter__(self): pathkey = self.pathkey(*item.keys())[0] if not pathkey: # not enough info yield item; continue - #if item.get('_path') == 'direction-technique-division/administration-finance': - # import pdb;pdb.set_trace(); - - - obj = self.context.unrestrictedTraverse(item[pathkey].lstrip('/'), None) if obj is None: # path doesn't exist yield item; continue diff --git a/collective/blueprint/jsonmigrator/configure.zcml b/collective/blueprint/jsonmigrator/configure.zcml index 519d3d5..c58be82 100644 --- a/collective/blueprint/jsonmigrator/configure.zcml +++ b/collective/blueprint/jsonmigrator/configure.zcml @@ -57,4 +57,14 @@ name="collective.blueprint.jsonmigrator.datafields" /> + + + + From fb24983a01e2e519cef9c4aa7527c72c8814bbd1 Mon Sep 17 00:00:00 2001 From: Boussard Date: Tue, 12 Apr 2011 10:46:32 +0200 Subject: [PATCH 11/29] fix export for attachment and plone article. Add relation for linguaplone --- export_scripts/plone2.0_export.py | 147 +++++++++++++++++++----------- 1 file changed, 94 insertions(+), 53 deletions(-) diff --git a/export_scripts/plone2.0_export.py b/export_scripts/plone2.0_export.py index ab7050e..9d69cda 100644 --- a/export_scripts/plone2.0_export.py +++ b/export_scripts/plone2.0_export.py @@ -9,6 +9,7 @@ ############################################################################### import os +import re import shutil import ConfigParser ### DEPENDENCY 2.0.0 for python2.3 @@ -23,11 +24,14 @@ import logging logger = logging.getLogger('plone20_export') +PAV3_MODEL_RE = re.compile(r'plonearticle_model([\d]*)') + try: #import pdb;pdb.set_trace(); CONFIG.readfp(open(os.path.join(getConfiguration().instancehome, 'jsonmigrator.ini'))) except: + logger.exception('Please specify ini file jsonmigrator.ini') logger.warning('Please specify ini file jsonmigrator.ini in your %s' \ % getConfiguration().instancehome) @@ -133,6 +137,11 @@ def getconf(option, default): JUST_TREAT_WAPPER = False print 'ID_TO_SKIP %s ' % str(ID_TO_SKIP) +try: + MAX_TREAT = int(getconf('MAX_TREAT', 0)) +except: + MAX_TREAT = 0 + try: MAX_CACHE_DB = int(getconf('MAX_CACHE_DB', 500)) except: @@ -178,6 +187,8 @@ def walk(folder): logger.info('>> SKIPPING :: ['+item.__class__.__name__+'] '\ + item.absolute_url()) continue + if MAX_TREAT != 0 and COUNTER >= MAX_TREAT: + continue logger.info('>> TREAT :: ('+ str(COUNTER) +')['+item.__class__.__name__+'] '\ + item.absolute_url()) yield item @@ -209,7 +220,7 @@ def write(items): for x in item.Control_Panel.Database.getDatabaseNames()] except: print "there is an error on %s" % item.absolute_url() - import pdb;pdb.set_trace(); + #import pdb;pdb.set_trace(); raise @@ -227,17 +238,25 @@ def write_to_jsonfile(item): for datafield in item['__datafields__']: datafield_filepath = os.path.join(SUB_TMPDIR, str(COUNTER)+'.json-file-'+str(datafield_counter)) f = open(datafield_filepath, 'wb') - f.write(item[datafield]) - item[datafield] = os.path.join(str(COUNTER/1000), str(COUNTER)+'.json-file-'+str(datafield_counter)) + if type(item[datafield]) is dict: + f.write(item[datafield]['data']) + del item[datafield]['data'] + else: + f.write(item[datafield]) + item[datafield] = {} + #f.write(item[datafield]) + item[datafield]['path'] = os.path.join(str(COUNTER/1000), str(COUNTER)+'.json-file-'+str(datafield_counter)) + #item[datafield] = os.path.join(str(COUNTER/1000), str(COUNTER)+'.json-file-'+str(datafield_counter)) f.close() datafield_counter += 1 item.pop(u'__datafields__') if '_plonearticle_attachments' in item: for item2 in item['_plonearticle_attachments']: + import pdb;pdb.set_trace(); datafield_filepath = os.path.join(SUB_TMPDIR, str(COUNTER)+'.json-file-'+str(datafield_counter)) f = open(datafield_filepath, 'wb') - f.write(item2['attachedFile'][0]) - item2['attachedFile'][0] = os.path.join(str(COUNTER/1000), str(COUNTER)+'.json-file-'+str(datafield_counter)) + f.write(item2['attachedFile'][0]['data']) + item2['attachedFile'][0]['data'] = os.path.join(str(COUNTER/1000), str(COUNTER)+'.json-file-'+str(datafield_counter)) f.close() datafield_counter += 1 if '_plonearticle_images' in item: @@ -245,10 +264,10 @@ def write_to_jsonfile(item): datafield_filepath = os.path.join(SUB_TMPDIR, str(COUNTER)+'.json-file-'+str(datafield_counter)) f = open(datafield_filepath, 'wb') try: - f.write(item2['attachedImage'][0]) + f.write(item2['attachedImage'][0]['data']) except: import pdb; pdb.set_trace() - item2['attachedImage'][0] = os.path.join(str(COUNTER/1000), str(COUNTER)+'.json-file-'+str(datafield_counter)) + item2['attachedImage'][0]['data'] = os.path.join(str(COUNTER/1000), str(COUNTER)+'.json-file-'+str(datafield_counter)) f.close() datafield_counter += 1 @@ -285,21 +304,22 @@ def __init__(self, obj): self.obj = obj self.portal = getToolByName(obj, 'portal_url').getPortalObject() + relative_url = getToolByName(obj, 'portal_url').getRelativeContentURL self.portal_utils = getToolByName(obj, 'plone_utils') self.charset = self.portal.portal_properties.site_properties.default_charset if not self.charset: # newer seen it missing ... but users can change it self.charset = 'utf-8' - + self['__datafields__'] = [] - self['_path'] = '/'.join(self.obj.getPhysicalPath()) - + #self['_path'] = '/'.join(self.obj.getPhysicalPath()) + self['_path'] = relative_url(self.obj) self['_type'] = self.obj.__class__.__name__ self['id'] = obj.getId() self['title'] = safe_decode(obj.title,self.charset, 'ignore') self['description'] = safe_decode(obj.description,self.charset, 'ignore') - self['language'] = obj.language + self['language'] = obj.Language() self['rights'] = safe_decode(obj.rights,self.charset, 'ignore') # for DC attrs that are tuples for attr in ('subject', 'contributors'): @@ -332,10 +352,11 @@ def __init__(self, obj): self['_workflow_history'] = workflow_history # default view + if 'layout' in obj.__dict__: self['_layout'] = obj.__dict__['layout'] try: - _browser = self.portal_utils.browserDefault(aq_base(obj))[1] + _browser = self.plone_utils.browserDefault(aq_base(obj))[1] except: _browser = None if _browser: @@ -378,6 +399,8 @@ def __init__(self, obj): for key, val in obj.__ac_local_roles__.items(): if key is not None: self['_ac_local_roles'][key] = val + if 'Owner' in val: + self['_owner'] = key self['_userdefined_roles'] = () if getattr(aq_base(obj), 'userdefined_roles', False): @@ -401,7 +424,13 @@ def __init__(self, obj): self['_permission_mapping'][perm['name']] = \ {'acquire': not unchecked, 'roles': new_roles} - + + if getattr(aq_base(obj), 'isCanonical', False): + if not obj.isCanonical(): + canonical = obj.getCanonical() + self['_canonical'] = relative_url(canonical) + + # self['_ac_inherited_permissions'] = {} # if getattr(aq_base(obj), 'ac_inherited_permissions', False): # oldmap = getPermissionMapping(obj.ac_inherited_permissions(1)) @@ -409,14 +438,16 @@ def __init__(self, obj): # old_p = Permission(key, values, obj) # self['_ac_inherited_permissions'][key] = old_p.getRoles() - if getattr(aq_base(obj), 'getWrappedOwner', False): - self['_owner'] = (1, obj.getWrappedOwner().getId()) - else: + + + #if getattr(aq_base(obj), 'getWrappedOwner', False): + # self['_owner'] = (1, obj.getWrappedOwner().getId()) + #else: # fallback # not very nice but at least it works # trying to get/set the owner via getOwner(), changeOwnership(...) # did not work, at least not with plone 1.x, at 1.0.1, zope 2.6.2 - self['_owner'] = (0, obj.getOwner(info = 1).getId()) + # self['_owner'] = (0, obj.getOwner(info = 1).getId()) def decode(self, s, encodings=('utf8', 'latin1', 'ascii')): if self.charset: @@ -430,7 +461,6 @@ def decode(self, s, encodings=('utf8', 'latin1', 'ascii')): class DocumentWrapper(BaseWrapper): - """ An wrapper to an Document. Object must have an text attribute """ def __init__(self, obj): super(DocumentWrapper, self).__init__(obj) @@ -439,8 +469,7 @@ def __init__(self, obj): class I18NFolderWrapper(BaseWrapper): - """ An wrapper to an I18NFolder """ - + def __init__(self, obj): super(I18NFolderWrapper, self).__init__(obj) # We are ignoring another languages @@ -469,7 +498,6 @@ def __init__(self, obj): class LinkWrapper(BaseWrapper): - """ An wrapper to ATLink """ def __init__(self, obj): super(LinkWrapper, self).__init__(obj) @@ -477,8 +505,6 @@ def __init__(self, obj): class NewsItemWrapper(DocumentWrapper): - """ An wrapper to NewsItem """ - def __init__(self, obj): super(NewsItemWrapper, self).__init__(obj) @@ -486,7 +512,6 @@ def __init__(self, obj): class ListCriteriaWrapper(BaseWrapper): - """ An wrapper to ListCriteria """ def __init__(self, obj): super(ListCriteriaWrapper, self).__init__(obj) @@ -496,7 +521,6 @@ def __init__(self, obj): class StringCriteriaWrapper(BaseWrapper): - """ An wrapper to StringCriteria """ def __init__(self, obj): super(StringCriteriaWrapper, self).__init__(obj) @@ -505,7 +529,6 @@ def __init__(self, obj): class SortCriteriaWrapper(BaseWrapper): - """ An wrapper to SortStringCriteria """ def __init__(self, obj): super(SortCriteriaWrapper, self).__init__(obj) @@ -514,7 +537,6 @@ def __init__(self, obj): class DateCriteriaWrapper(BaseWrapper): - """ An wrapper to DateCriteria """ def __init__(self, obj): super(DateCriteriaWrapper, self).__init__(obj) @@ -525,35 +547,29 @@ def __init__(self, obj): class FileWrapper(BaseWrapper): - """ An wrapper to OFSFile """ - - + ## fs file ## def __init__(self, obj): - super(FileWrapper, self).__init__(obj) self['__datafields__'].append('_datafield_file') data = str(obj.data) if len(data) != obj.getSize(): - raise Exception, 'Problem while extracting data for File content type at '+obj.absolute_url() + raise Exception, 'Problem while extracting data for File content type at '+obj.absolute_url() self['_datafield_file'] = data class ImageWrapper(BaseWrapper): - """ An wrapper to OFSImage """ - + ## fs image ## def __init__(self, obj): - super(ImageWrapper, self).__init__(obj) self['__datafields__'].append('_datafield_image') data = str(obj.data) if len(data) != obj.getSize(): - raise Exception, 'Problem while extracting data for Image content type at '+obj.absolute_url() + raise Exception, 'Problem while extracting data for Image content type at '+obj.absolute_url() self['_datafield_image'] = data class EventWrapper(BaseWrapper): - """ An wrapper to ATEvent """ def __init__(self, obj): super(EventWrapper, self).__init__(obj) @@ -567,7 +583,6 @@ def __init__(self, obj): class ArchetypesWrapper(BaseWrapper): - """ An wrapper to Archetype Object """ def __init__(self, obj): @@ -608,15 +623,22 @@ def __init__(self, obj): else: self[unicode(field.__name__)] = value.absolute_url() elif type_ in ['ImageField', 'FileField', 'AttachmentField']: + #import pdb;pdb.set_trace(); fieldname = unicode('_data_'+field.__name__) value = field.get(obj) value2 = value if type(value) is not str: - value = str(value.data) + try: + value = str(value.data) + except: + import pdb;pdb.set_trace(); if value: self['__datafields__'].append(fieldname) - self[fieldname] = value + self[fieldname] = {} + for x in field.get(obj).__dict__: + self[fieldname][x] = field.get(obj).__dict__[x] + self[fieldname]['data'] = value elif type_ in ['ComputedField']: pass @@ -647,7 +669,6 @@ def _guessFilename(self, data, fname='', mimetype='', default=''): class I18NLayerWrapper(ArchetypesWrapper): - """ An wrapper to I18N Archetype Object """ def __init__(self, obj): super(I18NLayerWrapper, self).__init__(obj) @@ -691,11 +712,21 @@ def generateUniqueId(type_name=None): return prefix + time + rand + suffix + +def getNewModelName(model): + re_match = PAV3_MODEL_RE.search(model) + if re_match is not None: + model = 'pa_model%s' % (re_match.group(1) or '1',) + elif model == 'plonearticle_view': + model = 'pa_model1' + return model + + class Article322Wrapper(NewsItemWrapper): - """ An wrapper to Old Plone Article Object (version<4)""" def __init__(self, obj): super(Article322Wrapper, self).__init__(obj) + #(Pdb) self.__ordered_attachment_refs__.getItems() #['4e952a8c3af4b1bcedf38d475ac6049d'] d = {'__ordered_attachment_refs__' : ('_plonearticle_attachments', @@ -711,14 +742,19 @@ def __init__(self, obj): 'LinkProxy', 'attachedLink', 'getRemoteUrl')} - + ## layout + + model = obj.getModel() + self['_layout'] = getNewModelName(model) + + ids = obj.objectIds() for x in d: slot_name = d[x][0] id_name = d[x][1] field_name = d[x][2] accessor = d[x][3] - setattr(self, slot_name, []) + self[slot_name] = [] for refid in getattr(obj,x).getItems(): ref = None try: @@ -737,12 +773,23 @@ def __init__(self, obj): 'ignore'), {}),} if ref.id in ids: ### internal - inner[field_name] = (getattr(ref, accessor)(), {}) + innerfile = getattr(ref, accessor)() + if innerfile: + di = {} + try: + data = str(innerfile.data) + except: + import pdb;pdb.set_trace(); + for x in innerfile.__dict__: + di[x] = innerfile.__dict__[x] + di['data'] = data + inner[field_name] = (di, {}) + else: #### external inner['referencedContent'] = (ref.UID(), {}) - getattr(self, slot_name).append(inner) - + self[slot_name].append(inner) + @@ -750,7 +797,6 @@ def __init__(self, obj): class ArticleWrapper(NewsItemWrapper): - """ An wrapper to Plone Article Object (version>=4)""" def __init__(self, obj): @@ -784,7 +830,6 @@ def __init__(self, obj): class ZPhotoWrapper(BaseWrapper): - """ An wrapper to ZPhoto """ def __init__(self, obj): super(ZPhotoWrapper, self).__init__(obj) @@ -802,7 +847,6 @@ def __init__(self, obj): class ZPhotoSlidesWrapper(BaseWrapper): - """ An wrapper to ZPhotoSlide """ def __init__(self, obj): super(ZPhotoSlidesWrapper, self).__init__(obj) @@ -850,7 +894,6 @@ def __init__(self, obj): class ContentPanels(BaseWrapper): - """ An wrapper to ControlPanel """ def __init__(self, obj): super(ContentPanels, self).__init__(obj) @@ -858,7 +901,6 @@ def __init__(self, obj): class LocalFSWrapper(BaseWrapper): - """ An wrapper to FS object """ def __init__(self, obj): super(LocalFSWrapper, self).__init__(obj) @@ -866,7 +908,6 @@ def __init__(self, obj): class ZopeObjectWrapper(BaseWrapper): - """ An wrapper to Zope object """ def __init__(self, obj): super(ZopeObjectWrapper, self).__init__(obj) From 911a4b70f61e90c47cfc959f272d7edf1feb4bc5 Mon Sep 17 00:00:00 2001 From: Boussard Date: Tue, 12 Apr 2011 16:53:12 +0200 Subject: [PATCH 12/29] prepare release --- setup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 307e19e..a3d1833 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import setup, find_packages import os -version = '0.1.1' +version = '0.2.0' setup(name='collective.blueprint.jsonmigrator', version=version, @@ -13,8 +13,8 @@ "Programming Language :: Python", ], keywords='', - author='', - author_email='', + author='garbas + yboussard', + author_email='y.boussard@alterway.fr', url='http://svn.plone.org/svn/collective/', license='GPL', packages=find_packages(exclude=['ez_setup']), From 982d903c4c3103755ac8c41a16bc3d10d7c0e345 Mon Sep 17 00:00:00 2001 From: Boussard Date: Tue, 12 Apr 2011 22:12:48 +0200 Subject: [PATCH 13/29] add zcml to be include when we release --- MANIFEST.in | 1 + 1 file changed, 1 insertion(+) diff --git a/MANIFEST.in b/MANIFEST.in index f744ce2..deabbd7 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,5 +1,6 @@ include *.txt recursive-include collective/blueprint/jsonmigrator * recursive-include export_scripts * +global-include *.zcml global-exclude *pyc From b7039707d380c81044e98a50fff2d4638cf9aaa3 Mon Sep 17 00:00:00 2001 From: Boussard Date: Tue, 12 Apr 2011 22:18:33 +0200 Subject: [PATCH 14/29] fix manifest --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index a3d1833..eb9d435 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import setup, find_packages import os -version = '0.2.0' +version = '0.2.1' setup(name='collective.blueprint.jsonmigrator', version=version, From d544abd202b38e5b256d3724c1e0a75ff641e0f3 Mon Sep 17 00:00:00 2001 From: Boussard Date: Tue, 12 Apr 2011 22:22:53 +0200 Subject: [PATCH 15/29] add zcml to be include when we release --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index eb9d435..b31fc6c 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import setup, find_packages import os -version = '0.2.1' +version = '0.2.2' setup(name='collective.blueprint.jsonmigrator', version=version, From ed539ba6d65c6a398929f27957e32934e9163461 Mon Sep 17 00:00:00 2001 From: Boussard Date: Tue, 19 Apr 2011 12:42:41 +0200 Subject: [PATCH 16/29] add an error step and fix issue for plonearticle --- .../blueprint/jsonmigrator/blueprint.py | 70 ++++++++++++++----- .../blueprint/jsonmigrator/configure.zcml | 7 ++ 2 files changed, 60 insertions(+), 17 deletions(-) diff --git a/collective/blueprint/jsonmigrator/blueprint.py b/collective/blueprint/jsonmigrator/blueprint.py index 4c691f7..a638571 100644 --- a/collective/blueprint/jsonmigrator/blueprint.py +++ b/collective/blueprint/jsonmigrator/blueprint.py @@ -73,8 +73,10 @@ def __iter__(self): json_file_path = os.path.join(self.path, str(item3), str(item2)+'.json') f = open(json_file_path) - - item = simplejson.loads(f.read()) + try: + item = simplejson.loads(f.read()) + except: + logger.exception('error in reading %s' % json_file_path) item['_json_file_path'] = json_file_path f.close() for key in item.keys(): @@ -180,10 +182,6 @@ def __iter__(self): stat += 'TOTAL TIME: %d; ' % (now - self.stats['START_TIME']) stat += 'STEP TIME: %d; ' % (now - self.stats['TIME_LAST_STEP']) self.stats['TIME_LAST_STEP'] = now - stat += 'EXISTED: %d; ADDED: %d; NOT-ADDED: %d' % ( - self.stats['EXISTED'], - self.stats['ADDED'], - self.stats['NOT-ADDED']) logging.warning(stat) @@ -574,7 +572,7 @@ def __init__(self, transmogrifier, name, options, previous): self.options = options self.previous = previous self.context = transmogrifier.context - + if 'path-key' in options: pathkeys = options['path-key'].splitlines() else: @@ -619,31 +617,69 @@ def getUnit(x, field_name): if '_plonearticle_images' in item and \ len(item['_plonearticle_images']): for (i, x) in enumerate(item['_plonearticle_images']) : - unit = getUnit(x,'attachedImage') - item['_plonearticle_images'][i]['attachedImage']=\ - (unit,{}) - obj.getField('images').set(obj,item['_plonearticle_images']) + if 'attachedImage' in x: + + unit = getUnit(x,'attachedImage') + item['_plonearticle_images'][i]['attachedImage']=\ + (unit,{}) + try: + obj.getField('images').set(obj,item['_plonearticle_images']) + except: + item['_error'] = item['_json_file_path'] + logger.error('cannot set images for %s %s' % \ + (item['_path'], + item['_json_file_path']) + ) if '_plonearticle_attachments' in item and\ len(item['_plonearticle_attachments']): for (i, x) in enumerate(item['_plonearticle_attachments']): - unit = getUnit(x,'attachedFile') - item['_plonearticle_attachments'][i]['attachedFile'] =\ + if 'attachedFile' in x: + unit = getUnit(x,'attachedFile') + item['_plonearticle_attachments'][i]['attachedFile'] =\ (unit,{}) - obj.getField('files').set(obj, - item['_plonearticle_attachments']) + try: + obj.getField('files').set(obj, + item['_plonearticle_attachments']) + except: + item['_error'] = item['_json_file_path'] + logger.error('cannot set files for %s %s' % \ + (item['_path'], + item['_json_file_path']) + ) if '_plonearticle_refs' in item and \ len(item['_plonearticle_refs']): try: obj.getField('links').set(obj, item['_plonearticle_refs']) except: + item['_error'] = item['_json_file_path'] logger.error('cannot set links for %s %s' % \ (item['_path'], item['_json_file_path']) ) - - + yield item + +class ReportError(object): + """ """ + classProvides(ISectionBlueprint) + implements(ISection) + + def __init__(self, transmogrifier, name, options, previous): + self.transmogrifier = transmogrifier + self.name = name + self.options = options + self.previous = previous + self.context = transmogrifier.context + path = resolvePackageReferenceOrFile(options['path']) + self.error_file = open(path,'w') + + def __iter__(self): + for item in self.previous: + if '_error' in item: + self.error_file.writelines((item['_error'],)) + yield item + class DataFields(object): """ """ diff --git a/collective/blueprint/jsonmigrator/configure.zcml b/collective/blueprint/jsonmigrator/configure.zcml index c58be82..d074ae7 100644 --- a/collective/blueprint/jsonmigrator/configure.zcml +++ b/collective/blueprint/jsonmigrator/configure.zcml @@ -67,4 +67,11 @@ name="collective.blueprint.jsonmigrator.plonearticle" /> + + + + From 439d189e5f2cf551acba092555908b7e13fa4539 Mon Sep 17 00:00:00 2001 From: Boussard Date: Tue, 19 Apr 2011 13:45:44 +0200 Subject: [PATCH 17/29] fix issue on PA3.2 --- export_scripts/plone2.0_export.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/export_scripts/plone2.0_export.py b/export_scripts/plone2.0_export.py index 9d69cda..6d94ccc 100644 --- a/export_scripts/plone2.0_export.py +++ b/export_scripts/plone2.0_export.py @@ -252,15 +252,18 @@ def write_to_jsonfile(item): item.pop(u'__datafields__') if '_plonearticle_attachments' in item: for item2 in item['_plonearticle_attachments']: - import pdb;pdb.set_trace(); + if not item2.has_key('attachedFile'): + continue datafield_filepath = os.path.join(SUB_TMPDIR, str(COUNTER)+'.json-file-'+str(datafield_counter)) - f = open(datafield_filepath, 'wb') - f.write(item2['attachedFile'][0]['data']) + f = open(datafield_filepath, 'wb') + f.write(item2['attachedFile'][0]['data']) item2['attachedFile'][0]['data'] = os.path.join(str(COUNTER/1000), str(COUNTER)+'.json-file-'+str(datafield_counter)) f.close() datafield_counter += 1 if '_plonearticle_images' in item: for item2 in item['_plonearticle_images']: + if not item2.has_key('attachedImage'): + continue datafield_filepath = os.path.join(SUB_TMPDIR, str(COUNTER)+'.json-file-'+str(datafield_counter)) f = open(datafield_filepath, 'wb') try: @@ -275,7 +278,7 @@ def write_to_jsonfile(item): try: simplejson.dump(item, f, indent=4) except: - import pdb;pdb.set_trace(); + raise str(item) f.close() @@ -637,7 +640,8 @@ def __init__(self, obj): self['__datafields__'].append(fieldname) self[fieldname] = {} for x in field.get(obj).__dict__: - self[fieldname][x] = field.get(obj).__dict__[x] + if type(field.get(obj).__dict__[x]) in (int,str): + self[fieldname][x] = field.get(obj).__dict__[x] self[fieldname]['data'] = value elif type_ in ['ComputedField']: @@ -778,10 +782,13 @@ def __init__(self, obj): di = {} try: data = str(innerfile.data) + for x in innerfile.__dict__: + if type(innerfile.__dict__[x]) in (int,str): + di[x] = innerfile.__dict__[x] except: - import pdb;pdb.set_trace(); - for x in innerfile.__dict__: - di[x] = innerfile.__dict__[x] + data = innerfile + + di['data'] = data inner[field_name] = (di, {}) From b356e0cc2130ca39247c0597f0ca450908d08dce Mon Sep 17 00:00:00 2001 From: Boussard Date: Tue, 19 Apr 2011 13:49:29 +0200 Subject: [PATCH 18/29] prepare release --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index b31fc6c..073e822 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import setup, find_packages import os -version = '0.2.2' +version = '0.2.3' setup(name='collective.blueprint.jsonmigrator', version=version, From a6d9eaa14a7cd91a4cd40652cb11fed63f2d483c Mon Sep 17 00:00:00 2001 From: Boussard Date: Fri, 29 Apr 2011 19:37:11 +0200 Subject: [PATCH 19/29] fix migration --- .../blueprint/jsonmigrator/blueprint.py | 70 ++++++++++++++----- 1 file changed, 52 insertions(+), 18 deletions(-) diff --git a/collective/blueprint/jsonmigrator/blueprint.py b/collective/blueprint/jsonmigrator/blueprint.py index a638571..591eaf3 100644 --- a/collective/blueprint/jsonmigrator/blueprint.py +++ b/collective/blueprint/jsonmigrator/blueprint.py @@ -4,7 +4,7 @@ import simplejson import logging import transaction - +import shutil from DateTime import DateTime from Acquisition import aq_base from ZODB.POSException import ConflictError @@ -36,7 +36,6 @@ DATAFIELD = '_datafield_' STATISTICSFIELD = '_statistics_field_prefix_' -import logging logger = logging.getLogger('collective.blueprint.jsonmigrator') class JSONSource(object): @@ -609,7 +608,16 @@ def getUnit(x, field_name): instance = obj) f.close() return unit - + + def getReferencedContent(x): + path = x['referencedContent'][0] + ## we try to get content + try: + refobj = self.context.restrictedTraverse(path) + return (refobj.UID(),{}) + except: + item['_error'] = item['_json_file_path'] + logger.exception('we cant set referencedContent for %s' % path) if IPloneArticle.providedBy(obj): @@ -622,14 +630,18 @@ def getUnit(x, field_name): unit = getUnit(x,'attachedImage') item['_plonearticle_images'][i]['attachedImage']=\ (unit,{}) + elif 'referencedContent' in x: + + item['_plonearticle_images'][i]['referencedContent']=getReferencedContent(x) try: obj.getField('images').set(obj,item['_plonearticle_images']) except: item['_error'] = item['_json_file_path'] - logger.error('cannot set images for %s %s' % \ - (item['_path'], - item['_json_file_path']) - ) + #import pdb;pdb.set_trace(); + logger.exception('cannot set images for %s %s' % \ + (item['_path'], + item['_json_file_path']) + ) if '_plonearticle_attachments' in item and\ len(item['_plonearticle_attachments']): @@ -638,26 +650,33 @@ def getUnit(x, field_name): unit = getUnit(x,'attachedFile') item['_plonearticle_attachments'][i]['attachedFile'] =\ (unit,{}) + elif 'referencedContent' in x: + item['_plonearticle_images'][i]['referencedContent']=getReferencedContent(x) try: obj.getField('files').set(obj, item['_plonearticle_attachments']) except: item['_error'] = item['_json_file_path'] - logger.error('cannot set files for %s %s' % \ - (item['_path'], - item['_json_file_path']) - ) + #import pdb;pdb.set_trace(); + logger.exception('cannot set files for %s %s' % \ + (item['_path'], + item['_json_file_path']) + ) if '_plonearticle_refs' in item and \ len(item['_plonearticle_refs']): + for (i, x) in enumerate(item['_plonearticle_refs']): + if 'referencedContent' in x: + item['_plonearticle_refs'][i]['referencedContent']=getReferencedContent(x) try: obj.getField('links').set(obj, item['_plonearticle_refs']) except: + item['_error'] = item['_json_file_path'] - logger.error('cannot set links for %s %s' % \ - (item['_path'], - item['_json_file_path']) - ) + logger.exception('cannot set links for %s %s' % \ + (item['_path'], + item['_json_file_path']) + ) yield item class ReportError(object): @@ -672,12 +691,19 @@ def __init__(self, transmogrifier, name, options, previous): self.previous = previous self.context = transmogrifier.context path = resolvePackageReferenceOrFile(options['path']) + self.json = resolvePackageReferenceOrFile(options['json']) self.error_file = open(path,'w') def __iter__(self): for item in self.previous: if '_error' in item: - self.error_file.writelines((item['_error'],)) + self.error_file.write(item['_error'] + "\n") + #shutil.copy(item['_error'], self.json) + path = os.path.dirname(item['_error']) + for x in (x for x in os.listdir(path) \ + if x.startswith(os.path.basename(item['_error']))): + shutil.copy(os.path.join(path, x), self.json) + yield item @@ -732,6 +758,14 @@ def __iter__(self): ) f.close() if len(value) != len(field.get(obj)): - field.set(obj, unit) - + try: + field.set(obj, unit) + except: + item['_error'] = item['_json_file_path'] + logger.exception('cannot set file(%s) for %s %s' % \ + (fieldname, + item['_path'], + item['_json_file_path']) + ) + yield item From e88727a6529ea5c17aa283599533953ba2a619e7 Mon Sep 17 00:00:00 2001 From: Boussard Date: Fri, 29 Apr 2011 21:11:33 +0200 Subject: [PATCH 20/29] Preparing release 0.2.4 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 073e822..af4d180 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import setup, find_packages import os -version = '0.2.3' +version = '0.2.4' setup(name='collective.blueprint.jsonmigrator', version=version, From 435b47bc148440171b66d2ae90e902a287062632 Mon Sep 17 00:00:00 2001 From: Boussard Date: Fri, 29 Apr 2011 21:12:34 +0200 Subject: [PATCH 21/29] Back to development: 0.2.5 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index af4d180..5d9e9cb 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import setup, find_packages import os -version = '0.2.4' +version = '0.2.5dev' setup(name='collective.blueprint.jsonmigrator', version=version, From 6e9250d4f1b1e5e0388357b76241290a315b257b Mon Sep 17 00:00:00 2001 From: Boussard Date: Sun, 1 May 2011 21:59:54 +0200 Subject: [PATCH 22/29] fix issue on owner field --- collective/blueprint/jsonmigrator/blueprint.py | 17 +++++------------ docs/owner.rst | 6 ++---- 2 files changed, 7 insertions(+), 16 deletions(-) diff --git a/collective/blueprint/jsonmigrator/blueprint.py b/collective/blueprint/jsonmigrator/blueprint.py index 591eaf3..40671cb 100644 --- a/collective/blueprint/jsonmigrator/blueprint.py +++ b/collective/blueprint/jsonmigrator/blueprint.py @@ -374,7 +374,7 @@ def __iter__(self): for item in self.previous: pathkey = self.pathkey(*item.keys())[0] ownerkey = self.ownerkey(*item.keys())[0] - + import pdb;pdb.set_trace(); if not pathkey or not ownerkey or \ ownerkey not in item: # not enough info yield item; continue @@ -385,23 +385,16 @@ def __iter__(self): if IBaseObject.providedBy(obj): - if item[ownerkey][0] and item[ownerkey][1]: + if item[ownerkey]: try: - obj.changeOwnership(self.memtool.getMemberById(item[ownerkey][1])) + obj.changeOwnership(self.memtool.getMemberById(item[ownerkey])) except Exception, e: raise Exception('ERROR: %s SETTING OWNERSHIP TO %s' % (str(e), item[pathkey])) try: - obj.manage_setLocalRoles(item[ownerkey][1], ['Owner']) + obj.manage_setLocalRoles(item[ownerkey], ['Owner']) except Exception, e: raise Exception('ERROR: %s SETTING OWNERSHIP2 TO %s' % (str(e), item[pathkey])) - - elif not item[ownerkey][0] and item[ownerkey][1]: - try: - obj._owner = item[ownerkey][1] - except Exception, e: - raise Exception('ERROR: %s SETTING __OWNERSHIP TO %s' % (str(e), item[pathkey])) - yield item @@ -651,7 +644,7 @@ def getReferencedContent(x): item['_plonearticle_attachments'][i]['attachedFile'] =\ (unit,{}) elif 'referencedContent' in x: - item['_plonearticle_images'][i]['referencedContent']=getReferencedContent(x) + item['_plonearticle_attachments'][i]['referencedContent']=getReferencedContent(x) try: obj.getField('files').set(obj, item['_plonearticle_attachments']) diff --git a/docs/owner.rst b/docs/owner.rst index 309e44f..381e8a3 100644 --- a/docs/owner.rst +++ b/docs/owner.rst @@ -32,10 +32,8 @@ Data in pipeline:: { "_path": "/Plone/index_html", - "_owner": [ - 1, - "admin" - ], + "_owner": "admin" + } From 4776cbcea5fc4f44a33ee5b7edae00865c049d1f Mon Sep 17 00:00:00 2001 From: Boussard Date: Tue, 10 May 2011 16:37:28 +0200 Subject: [PATCH 23/29] get user via acl_users --- collective/blueprint/jsonmigrator/blueprint.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/collective/blueprint/jsonmigrator/blueprint.py b/collective/blueprint/jsonmigrator/blueprint.py index 40671cb..84938c0 100644 --- a/collective/blueprint/jsonmigrator/blueprint.py +++ b/collective/blueprint/jsonmigrator/blueprint.py @@ -323,7 +323,6 @@ def __iter__(self): obj = self.context.unrestrictedTraverse(item[pathkey].lstrip('/'), None) if obj is None: # path doesn't exist yield item; continue - if IBaseObject.providedBy(obj): if getattr(aq_base(obj), '_delProperty', False): for prop in item[propertieskey]: @@ -356,6 +355,7 @@ def __init__(self, transmogrifier, name, options, previous): self.options = options self.previous = previous self.context = transmogrifier.context + self.acl_users = getToolByName(self.context, 'acl_users') self.memtool = getToolByName(self.context, 'portal_membership') if 'path-key' in options: @@ -374,7 +374,6 @@ def __iter__(self): for item in self.previous: pathkey = self.pathkey(*item.keys())[0] ownerkey = self.ownerkey(*item.keys())[0] - import pdb;pdb.set_trace(); if not pathkey or not ownerkey or \ ownerkey not in item: # not enough info yield item; continue @@ -387,7 +386,7 @@ def __iter__(self): if item[ownerkey]: try: - obj.changeOwnership(self.memtool.getMemberById(item[ownerkey])) + obj.changeOwnership(self.acl_users.getUserById(item[ownerkey])) except Exception, e: raise Exception('ERROR: %s SETTING OWNERSHIP TO %s' % (str(e), item[pathkey])) @@ -486,6 +485,7 @@ def __iter__(self): yield item; continue if IRoleManager.providedBy(obj): + for principal, roles in item[roleskey].items(): if roles: obj.manage_addLocalRoles(principal, roles) From 6224d23831786839aa837ca79e47cb15a3276bdb Mon Sep 17 00:00:00 2001 From: Boussard Date: Tue, 10 May 2011 17:11:14 +0200 Subject: [PATCH 24/29] Preparing release 0.2.5 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 5d9e9cb..94c2cd0 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import setup, find_packages import os -version = '0.2.5dev' +version = '0.2.5' setup(name='collective.blueprint.jsonmigrator', version=version, From a4d3d32edd6b3b5331937f085788c727afd5cebc Mon Sep 17 00:00:00 2001 From: Boussard Date: Tue, 10 May 2011 17:14:58 +0200 Subject: [PATCH 25/29] Back to development: 0.2.6 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 94c2cd0..18b3504 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import setup, find_packages import os -version = '0.2.5' +version = '0.2.6dev' setup(name='collective.blueprint.jsonmigrator', version=version, From 568f34736ac59968aa3de4f4e67c5f5894be8a46 Mon Sep 17 00:00:00 2001 From: Boussard Date: Wed, 11 May 2011 10:59:11 +0200 Subject: [PATCH 26/29] fix creators on export --- export_scripts/plone2.0_export.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/export_scripts/plone2.0_export.py b/export_scripts/plone2.0_export.py index 6d94ccc..966c34b 100644 --- a/export_scripts/plone2.0_export.py +++ b/export_scripts/plone2.0_export.py @@ -332,6 +332,14 @@ def __init__(self, obj): for val in val_tuple: self[attr].append(safe_decode(val,self.charset, 'ignore')) self[attr] = tuple(self[attr]) + # Creators + self['creators'] = [] + val_tuple = obj.Creators() + if val_tuple: + for val in val_tuple: + self['creators'].append(safe_decode(val,self.charset, 'ignore')) + + # for DC attrs that are DateTimes datetimes_dict = {'creation_date': 'creation_date', 'modification_date': 'modification_date', From 387d0d18b2f1998f1ccaff968db906ed8fa4ad65 Mon Sep 17 00:00:00 2001 From: Boussard Date: Wed, 11 May 2011 16:22:54 +0200 Subject: [PATCH 27/29] can modify bmp image --- .../blueprint/jsonmigrator/blueprint.py | 29 +++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/collective/blueprint/jsonmigrator/blueprint.py b/collective/blueprint/jsonmigrator/blueprint.py index 84938c0..377cf18 100644 --- a/collective/blueprint/jsonmigrator/blueprint.py +++ b/collective/blueprint/jsonmigrator/blueprint.py @@ -1,10 +1,13 @@ import time +import tempfile import os +import os.path import simplejson import logging import transaction import shutil +from PIL import Image from DateTime import DateTime from Acquisition import aq_base from ZODB.POSException import ConflictError @@ -592,8 +595,30 @@ def getUnit(x, field_name): f_path = os.path.join(os.path.dirname(\ item['_json_file_path']), f_name) - f = open(f_path, mode = 'rb') - value = f.read() + + ### + ## type2png = image/x-ms-bmp + value = '' + if x.get('content_type','') in self.options['type2png']: + path = tempfile.mkdtemp() + img = Image.open(f_path) + new_path = os.path.join(path,'image.png') + img.save(new_path) + f = open(new_path, mode = 'rb') + value = f.read() + x['content_type'] = 'image/png' + ext = os.path.splitext(x.get('filename', ''))[-1] + x['filename'] = x.get('filename','').replace(ext, '.png') + try: + os.unlink(path) + except: + pass + else: + f = open(f_path, mode = 'rb') + value = f.read() + + + unit = BaseUnit(name = name, file = value, mimetype = x.get('content_type', ''), From 13041b51675f7e6fc0cbf29fb5a810c925b9a5a2 Mon Sep 17 00:00:00 2001 From: Boussard Date: Fri, 13 May 2011 15:55:31 +0200 Subject: [PATCH 28/29] normalize group identifier and delete prefix group_ on group name --- collective/blueprint/jsonmigrator/blueprint.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/collective/blueprint/jsonmigrator/blueprint.py b/collective/blueprint/jsonmigrator/blueprint.py index 377cf18..9ac9f7f 100644 --- a/collective/blueprint/jsonmigrator/blueprint.py +++ b/collective/blueprint/jsonmigrator/blueprint.py @@ -15,6 +15,9 @@ from zope.interface import implements from zope.interface import classProvides +from plone.i18n.normalizer import idnormalizer + + from collective.transmogrifier.interfaces import ISectionBlueprint from collective.transmogrifier.interfaces import ISection from collective.transmogrifier.utils import Matcher @@ -489,10 +492,16 @@ def __iter__(self): if IRoleManager.providedBy(obj): + if self.options.get('erasebefore'): + obj.__ac_local_roles__ = {} for principal, roles in item[roleskey].items(): if roles: + if principal.startswith(u'group_'): + principal = principal[len(u'group_'):] + principal = idnormalizer.normalize(principal) + obj.manage_addLocalRoles(principal, roles) - obj.reindexObjectSecurity() + obj.reindexObjectSecurity() yield item @@ -599,7 +608,7 @@ def getUnit(x, field_name): ### ## type2png = image/x-ms-bmp value = '' - if x.get('content_type','') in self.options['type2png']: + if x.get('content_type','') in self.options.get('type2png',''): path = tempfile.mkdtemp() img = Image.open(f_path) new_path = os.path.join(path,'image.png') From 66d9051b620ec9b032a2ee5240ab73853368b628 Mon Sep 17 00:00:00 2001 From: Boussard Date: Fri, 13 May 2011 16:51:52 +0200 Subject: [PATCH 29/29] prefix group by group_ --- collective/blueprint/jsonmigrator/blueprint.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/collective/blueprint/jsonmigrator/blueprint.py b/collective/blueprint/jsonmigrator/blueprint.py index 9ac9f7f..91be7cf 100644 --- a/collective/blueprint/jsonmigrator/blueprint.py +++ b/collective/blueprint/jsonmigrator/blueprint.py @@ -497,9 +497,7 @@ def __iter__(self): for principal, roles in item[roleskey].items(): if roles: if principal.startswith(u'group_'): - principal = principal[len(u'group_'):] principal = idnormalizer.normalize(principal) - obj.manage_addLocalRoles(principal, roles) obj.reindexObjectSecurity()